In [9]:
# Import dependencies
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import pandas as pd

In [10]:
# Set the path for chromedriver (include chromdriver.exe in the same folder)
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## Step 1 - Scraping

### NASA Mars News

In [3]:
# Set the URL for the browser to open and visit the website
news_url = 'https://mars.nasa.gov/news/'
browser.visit(news_url)

# Wait 10 seconds so the browser can fully load, otherwise an empty HTML object may be created
# browser.wait_time = 10

# Create an HTML object from the web page
news_html = browser.html

# Use BeautifulSoup to parse through the HTML object and store it in a variable
news_soup = BeautifulSoup(news_html, 'html.parser')

In [4]:
# Scrape and collect the latest News Article Title
title = news_soup.find('ul', class_='item_list').find('div', class_='content_title').find('a').text

# Verify we have the correct title
title

'NASA Establishes Board to Initially Review Mars Sample Return Plans'

In [5]:
# Scrape and collect the News Article Teaser, right below the News Article Title
teaser = news_soup.find('ul', class_='item_list').find('div', class_='article_teaser_body').text

# Verify we have the correct article teaser
teaser

'The board will assist with analysis of current plans and goals for one of the most difficult missions humanity has ever undertaken.'

### JPL Mars Space Images - Featured Image

In [6]:
# Set the URL for the browser to open and visit the website
# Save the URL for the home page as we need it to append the route of the featured image later
home_url = 'https://www.jpl.nasa.gov'
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url)

# Wait 10 seconds so the browser can fully load, otherwise an empty HTML object may be created
# browser.wait_time = 10

# Create an HTML object from the web page
image_html = browser.html

# Use BeautifulSoup to parse through the HTML object and store it in a variable
image_soup = BeautifulSoup(image_html, 'html.parser')

In [7]:
# Scrape and collect the URL for the full sized Featured Image
featured = image_soup.find('section', class_='primary_media_feature').find('div', class_='carousel_items').find('article')['style']

# Split the inline CSS Style attribute object returned to capture the route to the full sized image
featured_image = featured.split("'")[1]

# Append the route to the home URL to get the full link
featured_image_url = home_url + featured_image

# Verify we have the correct link
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18899-1920x1200.jpg'

### Mars Facts

In [13]:
# Set the URL for pandas to read into a table
facts_url = 'https://space-facts.com/mars/'

# Visit the page to grab the name of the table so we can insert it as a header
browser.visit(facts_url)
table_html = browser.html
table_soup = BeautifulSoup(table_html, 'html.parser')
table_name = table_soup.find('section', class_='widget-area').find('div', class_='widget-header').find('h3').text

# Read the tables into pandas
tables = pd.read_html(facts_url)

# Display the table with facts about Mars
facts_table = tables[0]
facts_table

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [16]:
# Rename the columns
facts_table.columns = [table_name, '']
facts_table

Unnamed: 0,Mars Planet Profile,Unnamed: 2
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [17]:
# Convert the data to a HTML table string and remove the index
html_facts_table = facts_df.to_html(index=False)