### Establishing dependencies

In [19]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import pandas as pd
import time

#put this here since I'll use it more than once
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [20]:
#assigning variables to websites I'll use later
nasa_web = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
jpl_web = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
nasa_twitter = "https://twitter.com/marswxreport?lang=en"
facts_web = "https://space-facts.com/mars/"
usgu_web = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

### NASA Mars News
- save title and paragraph as text

In [3]:
browser.visit(nasa_web)

# title of first article
nasa_title = (bs(browser.html, 'html.parser').find('div', class_="content_title")).text.strip()

# first paragraph of first article
nasa_para = (bs(browser.html, 'html.parser').find('div', class_="article_teaser_body")).text.strip()

# para_button = browser.find_by_name("content_title").click()
# nasa_para = para_button.find('div', class_="wysiwg_content")

In [4]:
print(nasa_title, "---", nasa_para)

MarCO Makes Space for Small Explorers --- A pair of NASA CubeSats flying to Mars are opening a new frontier for small spacecraft.


### JPL Mars Space Images - Featured Image
- grab src text from full sized image

In [5]:
browser.visit(jpl_web)
browser.find_by_id("full_image").click()

# the browser tried to click too quickly so I had to add a sleep function
time.sleep(5)
browser.click_link_by_partial_text('more info')

jpl_end = (bs(browser.html, 'html.parser').find('img', class_='main_image')['src'])

full_jpl = (jpl_web + jpl_end)

In [6]:
print(full_jpl)

https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars/spaceimages/images/largesize/PIA14924_hires.jpg


### Mars Weather Twitter
- scrape content of most recent tweet

In [7]:
browser.visit(nasa_twitter)

tweet = (bs(browser.html, 'html.parser').find('div', class_="js-tweet-text-container")).text.strip()

In [8]:
print(tweet)

Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59


### Mars Facts Website DataFrame Scrape
- Use Pandas to convert the data to a HTML table string

In [9]:
mars_facts = pd.DataFrame(pd.read_html(facts_web)[0])

facts_html = mars_facts.to_html()

In [10]:
facts_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    

### Mars Hemispheres Images
- obtain high resolution images for each of Mars' hemispheres
- Save image src text for full resolution hemisphere image, as well as the Hemisphere title containing the hemisphere name

In [23]:
# empty list for the string
hemisphere_img = []

# open the page
browser.visit(usgu_web)
links = browser.find_by_css("a.product-item h3")

# loop through, grab the title and url and store it in a dictionary
for link in range(len(links)):
    images= {}
    browser.find_by_css("a.product-item h3")[link].click()
    time.sleep(5)
    image_url = browser.find_link_by_text('Sample')
    images['img_url'] = image_url['href']
    browser.find_by_css('h2.title').text
    image_title = browser.find_by_css('h2.title').text
    images['title'] = image_title
    hemisphere_img.append({"title": image_title, "image_url": image_url})
    browser.back()

In [24]:
print(hemisphere_img)

[{'title': 'Cerberus Hemisphere Enhanced', 'image_url': [<splinter.driver.webdriver.WebDriverElement object at 0x0000021FBB31E4E0>]}, {'title': 'Schiaparelli Hemisphere Enhanced', 'image_url': [<splinter.driver.webdriver.WebDriverElement object at 0x0000021FBB31E5F8>]}, {'title': 'Syrtis Major Hemisphere Enhanced', 'image_url': [<splinter.driver.webdriver.WebDriverElement object at 0x0000021FBB35B2E8>]}, {'title': 'Valles Marineris Hemisphere Enhanced', 'image_url': [<splinter.driver.webdriver.WebDriverElement object at 0x0000021FBB34E400>]}]
