### Import Dependencies

In [1]:
import time
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from selenium import webdriver

In [2]:
# print out the location of the driver
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
# identify location of chromedriver and store it as a variable
driverPath = !which chromedriver

# Setup configuration variables to enable Splinter to interact with browser
executable_path = {'executable_path': driverPath[0]}
browser = Browser('chrome', **executable_path, headless=False)

### Define URL to scrape and inform the browser to visit the page

In [4]:
url = "https://mars.nasa.gov/news/"
browser.visit(url)

### Scrape content from the website (latest news title and paragraph text)

In [5]:

# Splinter can capture a page's underlying html and use pass it to BeautifulSoup to allow us to scrape the content
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Using BS, we can execute standard functions to capture the page's content
quotes = soup.find_all('li', class_='slide')

news_title = quotes[0].h3.text
news_p = quotes[0].a.text
print(news_title)
print(news_p)

InSight Lands Praise and a Proclamation from LA County
Several members of the Mars InSight team accepted a proclamation on behalf of the mission from L.A. County Board of Supervisors on Tuesday, Feb. 19.InSight Lands Praise and a Proclamation from LA County


### Define JPL URL to scrape and inform the browser to visit the page

In [6]:
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)

### Find the largesize image URL 

In [7]:

# Splinter can capture a page's underlying html and use pass it to BeautifulSoup to allow us to scrape the content
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


### Go to next page with FULL IMAGE link

In [8]:
browser.click_link_by_partial_text('FULL IMAGE')


### Go to large size image link 

In [9]:
browser.click_link_by_partial_text('more info')

### Scrape the image url from the new website

In [10]:
new_html = browser.html
new_soup = BeautifulSoup(new_html, 'html.parser')
image = new_soup.find('img', class_='main_image')
image_url = image.get('src')


### Assign the url string to a variable called 'featured_image_url'

In [11]:
featured_image_url = 'https://www.jpl.nasa.gov'+image_url
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19323_hires.jpg


### Mars Weather

In [12]:
# Define the weather url and inform the browser to visit the page
weather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(weather_url)

# Using bs to capture the page's content
weather_html = browser.html
soup = BeautifulSoup(weather_html, 'html.parser')
# Scrape the latest (first) news about the mars weather
mars_weather = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
print(mars_weather)

InSight sol 89 (2019-02-26) low -95.1ºC (-139.2ºF) high -14.4ºC (6.1ºF)
winds from the SW at 4.3 m/s (9.6 mph) gusting to 12.4 m/s (27.8 mph)
pressure at 7.20 hPapic.twitter.com/h8gODY5bfk


### Mars Facts

In [13]:
facts_url = 'http://space-facts.com/mars/'

### Scrape the table information from the defined url and save as pandas

In [14]:
table = pd.read_html(facts_url)
table[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [15]:
df = table[0]
df.columns = ['Parameters', 'Facts']
df

Unnamed: 0,Parameters,Facts
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [16]:
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Parameters</th>\n      <th>Facts</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium 

### Mars Hemispheres

### Define the USGS url and inform the browser to visit the page

In [63]:
USGS_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(USGS_url)

In [64]:

# Splinter can capture a page's underlying html and use pass it to BeautifulSoup to allow us to scrape the content
hemis_html = browser.html
soup = BeautifulSoup(hemis_html, 'html.parser')

# Using BS, we can execute standard functions to capture the page's content
titles = soup.find_all('div', class_='item')
# define an empty dictionary to store the title and image_url data
hemisphere_image_urls = []


    

### Iterate each title and scrape the title and image url string, store the data into the dictionary

In [65]:
for title in titles:
    # scrape the hemisphere title
    image_title = title.h3.text
    # go to the title's link
    browser.click_link_by_partial_text(image_title)
    time.sleep(5)
    # scrape the high resolution hemisphere image url 
    full_html = browser.html
    full_soup = BeautifulSoup(full_html, 'html.parser')
    full_quote = full_soup.find('div',class_ = 'downloads')
    full_url = full_quote.a['href']
    # store the hemisphere title and image url string into the dictionary
    hemisphere_image_urls.append({"title": image_title, "image_url": full_url})
    browser.back()
    
    
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
