# Setup and Dependecies

In [15]:
# Import dependecies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import pymongo
import random
import re

In [2]:
# Setup MongoDB connecion
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
db = client.mars_db

In [3]:
# Configuring splinter browser to access HTML of the target pages
executable_path = {'executable_path': 'chromedriver.exe', 'headless': False }
browser = Browser('chrome', **executable_path)

# NASA Mars News

In [4]:
# Scraping NASA website for a title and contents of the latest news
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
soup = bs(browser.html, 'lxml')

In [5]:
news_title = None
news_text = None

article = soup.find('li', class_='slide')
if article:
    header = article.find('div', class_='content_title')
    if header:
        news_title = header.text.strip()
    body = article.find('div', class_='article_teaser_body')
    if body:
        news_text = body.text.strip()
print(f'The latest news title: {news_title}')
print(f'The latest news text: {news_text}')    

The latest news title: NASA to Host Briefing on November Mars InSight Landing
The latest news text: A briefing on NASA's upcoming InSight Mars landing will air on Wed. Oct. 31 at 1:30 p.m. EDT (10:30 a.m. PDT) on NASA TV, the agency's website and NASA InSight Facebook Page.


# JPL Mars Space Images - Featured Image

In [6]:
# Scraping Jet Propulsion Laboratory website for one high-res image of Mars
url_base = 'https://www.jpl.nasa.gov'
url = f'{url_base}/spaceimages/?search=&category=Mars'
browser.visit(url)
soup = bs(browser.html, 'lxml')

In [9]:
featured_img_url = None
featured_img_title = None

section = soup.find('section', class_='main_feature')
if section:
    article = section.find('article', class_='carousel_item')
    if article:
        match = re.search("url\('.+'\)", article['style'])
        featured_img_url = match[0][5:][:-2]
        featured_img_url = f'{url_base}{featured_img_url}'
        title = article.h1
        if title:
            featured_img_title = article.h1.text.strip()
print(f'High-res featured image "{featured_img_title}" is found at {featured_img_url}')    

High-res featured image "Black Holes Shine for NuSTAR" is found at https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17440-1920x1200.jpg


# Mars Weather

In [10]:
# Scraping Twitter webpage for the latest tweet on Mars weather
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
soup = bs(browser.html, 'lxml')

In [11]:
mars_weather = None

tweet = soup.find('div', class_='tweet')
if tweet:
    tweet_text = tweet.find('p', class_='tweet-text')
    if tweet_text:
        mars_weather = tweet_text.text.strip()
print(f'Latest Mars weather report: {mars_weather}')

Latest Mars weather report: Sol 2208 (2018-10-22), high -18C/0F, low -75C/-102F, pressure at 8.80 hPa, daylight 06:08-18:26


# Mars Facts

In [14]:
# Scraping Space Facts webpage for the interesting info on Mars
url = 'http://space-facts.com/mars/'
browser.visit(url)
soup = bs(browser.html, 'lxml')

In [23]:
facts_df = pd.read_html(browser.html)[0]
facts_df.rename(columns={0:'Fact', 1:'Details'}, inplace=True)
facts = facts_df.to_dict(orient='list')
facts

{'Fact': ['Equatorial Diameter:',
  'Polar Diameter:',
  'Mass:',
  'Moons:',
  'Orbit Distance:',
  'Orbit Period:',
  'Surface Temperature:',
  'First Record:',
  'Recorded By:'],
 'Details': ['6,792 km',
  '6,752 km',
  '6.42 x 10^23 kg (10.7% Earth)',
  '2 (Phobos & Deimos)',
  '227,943,824 km (1.52 AU)',
  '687 days (1.9 years)',
  '-153 to 20 °C',
  '2nd millennium BC',
  'Egyptian astronomers']}