# Mission to Mars | <em>Aaron Wollman</em>

## Setup

In [1]:
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
from pprint import pprint
import time

In [2]:
from config import chromedriver_path

In [3]:
def init_chrome_browser(exe_path):
    """
    Opens a chrome session in splinter.
    @ params:
        exe_path - The path to a chrome browser driver, like chromedriver.
    """
    executable_path = {"executable_path": exe_path}
    return Browser("chrome", **executable_path, headless=False)

In [4]:
def get_soup_for_current(browser):
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    return soup

def get_soup(browser, url):
    """
    Opens a url and returns it's HTML BeautifulSoup object.
    @ params:
        browser - A splinter browser.
        url - The url to get the HTML from.
    """
    browser.visit(url)
    return get_soup_for_current(browser)

In [5]:
browser = init_chrome_browser(chromedriver_path)

## NASA Mars News

In [None]:
# Get the HTML
nasa_news_url = "https://mars.nasa.gov/news/"
nasa_soup = get_soup(browser, nasa_news_url)
time.sleep(3) # Make sure the page is loaded before moving on.
print(nasa_soup.prettify())

In [None]:
latest_news_soup = nasa_soup.find("div", class_="list_text")
print(latest_news_soup.prettify())

In [None]:
latest_title_soup = latest_news_soup.find("div", class_="content_title")
latest_title = latest_title_soup.get_text()
print(latest_title)

In [None]:
latest_paragraph_soup = latest_news_soup.find("div", class_="article_teaser_body")
latest_paragraph = latest_paragraph_soup.get_text()
print(latest_paragraph)

In [None]:
nasa_news_latest = {
    "title" : latest_title,
    "paragraph" : latest_paragraph
}

## JPL Mars Space Images

In [None]:
jpl_base_url = "https://www.jpl.nasa.gov"
jpl_url = f"{jpl_base_url}/spaceimages/?search=&category=Mars"
jpl_soup = get_soup(browser, jpl_url)
print(jpl_soup.prettify())

In [None]:
jpl_header_soup = jpl_soup.find("article")
print(jpl_header_soup.prettify())

In [None]:
partial_url = jpl_header_soup.find("a").get("data-fancybox-href")
featured_image_url = f"{jpl_base_url}{partial_url}"
print(featured_image_url)

## Mars Facts

In [None]:
mars_facts_url = "https://space-facts.com/mars/"
mars_facts_tables = pd.read_html(mars_facts_url)
mars_planet_profile = mars_facts_tables[0]
mars_planet_profile

In [None]:
mars_planet_profile_html = mars_planet_profile.to_html(index=False, header=False)
pprint(mars_planet_profile_html)

## Mars Hemispheres

In [None]:
hemisphere_root = "https://astrogeology.usgs.gov"
hemisphere_url = f"{hemisphere_root}/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
hemisphere_soup = get_soup(browser, hemisphere_url)
print(hemisphere_soup.prettify())

In [None]:
hemisphere_list_soup = hemisphere_soup.find("div", class_="results")
print(hemisphere_list_soup.prettify())

In [None]:
hemisphere_image_urls = []
items = hemisphere_list_soup.find_all("div", class_="item")
for item_soup in items:
    item_link = item_soup.find("a").get("href")
    image_page_soup = get_soup(browser, f"{hemisphere_root}{item_link}")
    image_title = image_page_soup.find("h2").get_text()
    image_download_soup = image_page_soup.find_all("div", class_="downloads")[0]
    image_url = image_download_soup.find("a").get("href")
    hemisphere_image_url = {
        "title" : image_title,
        "img_url" : image_url
    }
    hemisphere_image_urls.append(hemisphere_image_url)
pprint(hemisphere_image_urls)    

## Cleanup

In [None]:
browser.quit()