<h1>Web Scraping Section</h1>

In [28]:
#import dependencies

from splinter import Browser
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [29]:
#Which chromedriver? This chromedriver.

!which chromedriver

/usr/local/bin/chromedriver


In [30]:
#Connect chromedriver with splinter's Browser object.

executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

<h2>Article Scrape</h2>

In [5]:
#Use chromedriver to open the NASA website
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [6]:
#Latest article title.
html = browser.html
soup = BeautifulSoup(html, "html.parser")

article_title = soup.find_all("div", class_="content_title")[0].text.strip()
browser.click_link_by_partial_text(article_title)

article_title

"Curiosity Tastes First Sample in 'Clay-Bearing Unit'"

In [7]:
#Latest article paragraph.

html = browser.html 
soup = BeautifulSoup(html, "html.parser")

paragraph = soup.find('div', class_='wysiwyg_content').find_all("p")[0].text.strip()
paragraph

'Scientists working with NASA\'s Curiosity Mars rover have been excited to explore a region called "the clay-bearing unit" since before the spacecraft launched. Now, the rover has finally tasted its first sample from this part of Mount Sharp. Curiosity drilled a piece of bedrock nicknamed "Aberlady" on Saturday, April 6 (the 2,370th Martian day, or sol, of the mission), and delivered the sample to its internal mineralogy lab on Wednesday, April 10 (Sol 2374).'

<h2>Featured Image Scrape</h2>

In [8]:
#Featured Image URL

url2 = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url2)

In [176]:
#Reinitialize the url for every click-through
#On main page, click thorugh full image button
html2 = browser.html
soup2 = BeautifulSoup(html2, "html.parser")
full_image = soup2.find("footer").find("a", class_="button fancybox").text.strip()

browser.click_link_by_partial_text(full_image)

In [179]:
#On carosel page, click through more info
html3 = browser.html
soup3 = BeautifulSoup(html3, "html.parser")
more_info = soup3.find("div", class_="fancybox-title fancybox-title-outside-wrap")\
            .find("a", class_="button").text.strip()

browser.click_link_by_partial_text(more_info)

In [186]:
#Full-sized image link
html4 = browser.html
soup4 = BeautifulSoup(html4, "html.parser")

for i in soup4.find_all('figure'):
    featured_image = (i.a['href'])

featured_image

'/spaceimages/images/largesize/PIA20318_hires.jpg'

<h2>Mars Weather Scrape</h2>

In [190]:
#Connect to Mars Weather's Twitter account
url3 = "https://twitter.com/marswxreport?lang=en"
browser.visit(url3)

In [194]:
#The tweet
html5 = browser.html
soup5 = BeautifulSoup(html5, "html.parser")

mars_weather = soup5.find("ol", class_="stream-items js-navigable-stream")\
            .find_all("li", class_="js-stream-item stream-item stream-item ")[0]\
            .find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text.strip()
mars_weather

'InSight sol 135 (2019-04-13) low -96.5ºC (-141.8ºF) high -16.6ºC (2.2ºF)\nwinds from the SW at 4.2 m/s (9.4 mph) gusting to 11.3 m/s (25.3 mph)\npressure at 7.30 hPapic.twitter.com/bRsLlzn4M0'

<h2>Mars Facts Scrape</h2>

In [201]:
#Connect to the Mars Facts web pages
url4 = "https://space-facts.com/mars/"

In [202]:
#Use pandas to scrape the table of facts about Mars.
table = pd.read_html(url4)
table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [203]:
#Mars is cool. This table says so.
df = table[0]
df.columns = ["Planet Profile", "Stats"]
df

Unnamed: 0,Planet Profile,Stats
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [204]:
#Pandas turns the table into html for later use
table_data_html = df.to_html()
table_data_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Planet Profile</th>\n      <th>Stats</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millenn

<h2>Mars Hemisphere</h2>

In [37]:
#Connect to the USGS website
url5 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url5)

In [41]:
#Cook the soup
html6 = browser.html
soup6 = BeautifulSoup(html6, "html.parser")

link_titles = []

for el in soup6.findAll("div", class_="description"):
    link_titles.append(el.find("h3").text.strip())


In [42]:
links = browser.find_by_css('a.product-item h3')
links

[<splinter.driver.webdriver.WebDriverElement at 0x12262a518>,
 <splinter.driver.webdriver.WebDriverElement at 0x12262a470>,
 <splinter.driver.webdriver.WebDriverElement at 0x12262a3c8>,
 <splinter.driver.webdriver.WebDriverElement at 0x12262a710>]

In [43]:
link_titles

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [55]:
#Connect to the USGS website
url5 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url5)

mars_hemispheres = []
base_url = "https://astrogeology.usgs.gov"

for i in range(len(links)):
    #empty dictionary
    hemispheres = {}
    
    #click to the hemisphere
    browser.find_by_css('a.product-item h3')[i].click()
    
    #reinitialize browser and cook the soup
    html7 = browser.html
    soup7 = BeautifulSoup(html7, "html.parser")
    
    #get the image url
    full_url = base_url + soup7.find('img', class_="wide-image")['src']
    
    #fill dictionary
    hemispheres['title'] = link_titles[i]
    hemispheres['image_url'] = full_url
    
    #push to mars_hemispheres list
    mars_hemispheres.append(hemispheres)
    
    #click back
    browser.back()