In [20]:
# Import dependencies
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import pandas as pd
import time

In [21]:
# Set the path for chromedriver (include chromdriver.exe in the same folder)
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## Step 1 - Scraping

### NASA Mars News

In [22]:
# Set the URL for the browser to open and visit the website
news_url = 'https://mars.nasa.gov/news/'
browser.visit(news_url)

# Wait 5 seconds so the browser can fully load, otherwise an empty HTML object may be created
time.sleep(5)

# Create an HTML object from the web page
news_html = browser.html

# Use BeautifulSoup to parse through the HTML object and store it in a variable
news_soup = BeautifulSoup(news_html, 'html.parser')

In [23]:
# Scrape and collect the latest News Article Title
title = news_soup.find('ul', class_='item_list').find('div', class_='content_title').find('a').text

# Verify we have the correct title
title

"Follow NASA's Perseverance Rover in Real Time on Its Way to Mars"

In [24]:
# Scrape and collect the News Article Teaser, right below the News Article Title
teaser = news_soup.find('ul', class_='item_list').find('div', class_='article_teaser_body').text

# Verify we have the correct article teaser
teaser

"A crisply rendered web application can show you where the agency's Mars 2020 mission is right now as it makes its way to the Red Planet for a Feb. 18, 2021, landing."

### JPL Mars Space Images - Featured Image

In [6]:
# Set the URL for the browser to open and visit the website
# Save the URL for the home page as we need it to append the route of the featured image later
home_url = 'https://www.jpl.nasa.gov'
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url)

# Wait 5 seconds so the browser can fully load, otherwise an empty HTML object may be created
time.sleep(5)

# Create an HTML object from the web page
image_html = browser.html

# Use BeautifulSoup to parse through the HTML object and store it in a variable
image_soup = BeautifulSoup(image_html, 'html.parser')

In [7]:
# Scrape and collect the URL for the full sized Featured Image
featured = image_soup.find('section', class_='primary_media_feature').find('div', class_='carousel_items').find('article')['style']

# Split the inline CSS Style attribute object returned to capture the route to the full sized image
featured_image = featured.split("'")[1]

# Append the route to the home URL to get the full link
featured_image_url = home_url + featured_image

# Verify we have the correct link
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA14293-1920x1200.jpg'

### Mars Facts

In [8]:
# Set the URL for pandas to read into a table
facts_url = 'https://space-facts.com/mars/'

# Visit the page to grab the name of the table so we can insert it as a header
browser.visit(facts_url)

# Wait 5 seconds so the browser can fully load, otherwise an empty HTML object may be created
time.sleep(5)

table_html = browser.html
table_soup = BeautifulSoup(table_html, 'html.parser')
table_name = table_soup.find('section', class_='widget-area').find('div', class_='widget-header').find('h3').text

# Read the tables into pandas
tables = pd.read_html(facts_url)

# Display the table with facts about Mars
facts_table = tables[0]
facts_table

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [9]:
# Rename the columns
facts_table.columns = [table_name, '']
facts_table

Unnamed: 0,Mars Planet Profile,Unnamed: 2
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [10]:
# Convert the data to a HTML table string and remove the index
html_facts_table = facts_table.to_html(index=False)

### Mars Hemispheres

In [11]:
# Set the URL for the image path, visit the page, create an HTML object, pass it to the parser
base_url = 'https://astrogeology.usgs.gov/'
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemisphere_url)

hemisphere_html = browser.html
hemisphere_soup = BeautifulSoup(hemisphere_html, 'html.parser')

# Find the number of H3 tags because those are the title tags for each hemisphere
title_location = hemisphere_soup.find('div', class_='collapsible results')
titles = title_location.find_all('h3')

hemisphere_list = []

for title in titles:

    # New dictionary for each entry
    hemisphere_dict = {}

    # Get the title from the h3 tag and strip off the last word and any leanding or trailing whitespaces
    name = title.text
    hemisphere_name = name.strip('Enhanced').strip()
    hemisphere_dict['title'] = hemisphere_name

    browser.click_link_by_partial_text(name)

    img_link_html = browser.html
    img_link_soup = BeautifulSoup(img_link_html, 'html.parser')
    img_link_route = img_link_soup.find('div', class_='wide-image-wrapper').find('img')['src']
    img_link = base_url + img_link_route

    hemisphere_dict['img_url'] = img_link

    hemisphere_list.append(hemisphere_dict)

    browser.back()
