# Part 1: Scraping

### Import dependencies

In [None]:
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import requests

In [None]:
# Set up Splinter

executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless = False)

## Scraping code

### NASA Mars News — Latest News Title and Paragraph Text

In [None]:
# Set URL for news site

url_news = 'https://redplanetscience.com/'

In [None]:
# Use Splinter to visit the website 

browser.visit(url_news)

In [None]:
# Make a Beautiful Soup object of the website via Splinter, look at the results
html_news = browser.html

soup_news = BeautifulSoup(html_news, 'html.parser')

print(soup_news.prettify())

In [None]:
# Using the soup object, the headline is the class 'content_title' and the description 
# is the class 'article_teaser_body'

results = soup_news.find('div', class_='article_teaser_body')

print(results)

In [None]:
# Assign headline and description to variables

headline = soup_news.find('div', class_ = 'content_title')

description = soup_news.find('div', class_ = 'article_teaser_body')

In [None]:
# Quit the browser

browser.quit()

### JPL Mars Space Images — Featured Image

In [None]:
# Load in new URL

url_images = 'https://spaceimages-mars.com/'

In [None]:
# Use Splinter to click the "Full Image" button

browser.visit(url_images)

In [None]:
# Explore the site with BeautifulSoup

html_images = browser.html

soup_images = BeautifulSoup(html_images, 'html.parser')

print(soup_images.prettify())

In [None]:
# Save the image URL

featured_image = soup_images.find('img', class_ = 'headerimage fade-in')

print(featured_image)

In [None]:
image_extension = featured_image.get('src')

In [None]:
image_full_url = 'https://spaceimages-mars.com/' + image_extension

print(image_full_url)

In [None]:
# Quit the browser

browser.quit()

### Mars Facts

In [None]:
# Load in new URL

url_facts = 'https://galaxyfacts-mars.com/'

browser.visit(url_facts)

In [None]:
# Find facts table

html_facts = browser.html

soup_facts = BeautifulSoup(html_facts, 'html.parser')

print(soup_facts.prettify())

In [None]:
# Save data from facts table using Pandas

tables = pd.read_html(url_facts)

tables

In [None]:
# Save the table I actually want

mars_table = tables[1]

In [None]:
# Construct an HTML table string, part 1: turn list into a dataframe

mars_df = pd.DataFrame(mars_table)

mars_df

In [None]:
# Construct an HTML table string, part 2: turn dataframe into a table string

mars_table_html = mars_df.to_html()

### Mars Hemispheres

In [None]:
# Create a dictionary to store the image information

hemispheres = []

In [None]:
# Load in new URL

url_hemispheres = 'https://marshemispheres.com/'

In [None]:
# Use Splinter to open the URL

browser.visit(url_hemispheres)

In [None]:
# Look at the site structure with Beautiful Soup

hemispheres_html = browser.html

soup_hemi = BeautifulSoup(hemispheres_html, 'html.parser')

print(soup_hemi.prettify())

In [None]:
# Use Splinter to click on Cerberus picture

browser.links.find_by_partial_text('Cerberus Hemisphere Enhanced').click()

In [None]:
# Get Cerberus Hemisphere
cerberus_dict = {}
cerberus_dict["title"] = "Cerberus Hemisphere Enhanced"
cerberus_dict["image_url"] = browser.url

cerberus_dict

In [None]:
# Navigate back to the main page

browser.back()

In [None]:
# Use Splinter to click on Schiaparelli picture

browser.links.find_by_partial_text('Schiaparelli Hemisphere Enhanced').click()

In [None]:
# Get Schiaparelli Hemisphere

schiap_dict = {}
schiap_dict["title"] = "Schiaparelli Hemisphere Enhanced"
schiap_dict["image_url"] = browser.url

schiap_dict

In [None]:
# Navigate back to the main page

browser.back()

In [None]:
# Use Splinter to click on Syrtis Major picture

browser.links.find_by_partial_text('Syrtis Major Hemisphere Enhanced').click()

In [None]:
# Get Syrtis Major Hemisphere

syrtis_dict = {}
syrtis_dict["title"] = "Syrtis Major Hemisphere Enhanced"
syrtis_dict["image_url"] = browser.url

syrtis_dict

In [None]:
# Navigate back to the main page

browser.back()

In [None]:
# Use Splinter to click on Valles Marineris picture

browser.links.find_by_partial_text('Valles Marineris Hemisphere Enhanced').click()

In [None]:
# Get Valles Marineris Hemisphere

valles_dict = {}
valles_dict["title"] = "Valles Marineris Hemisphere Enhanced"
valles_dict["image_url"] = browser.url

valles_dict

In [None]:
# Append dictionaries to hemispheres list

hemispheres.append(cerberus_dict)
hemispheres.append(schiap_dict)
hemispheres.append(syrtis_dict)
hemispheres.append(valles_dict)
hemispheres

In [None]:
# Quit the browser

browser.quit()