## Step 1 - Scraping

#### Dependencies

In [1]:
import numpy as np                                                           # numpy library

In [2]:
import pandas as pd                                                          # pandas library

In [3]:
from splinter import Browser                                                 # browser module from splinter library

In [4]:
from selenium import webdriver                                               # webdriver module from selenium library

In [5]:
from bs4 import BeautifulSoup as bs                                          # BeautifulSoup module from bs4 library

In [6]:
import requests as req                                                       # requests library

#### NASA Mars News - Scrape the NASA Mars News Site and collect the latest News Title and Paragragh Text. Assign the text to variables that you can reference later.

In [7]:
nasa_url = "https://mars.nasa.gov/news/"                                     # URL to scrape
response = req.get(nasa_url)                                                 # acquires response from URL

nasa_soup = bs(response.text, 'html.parser')                                 # sends response to beautiful soup

news_title = nasa_soup.find("div", class_="content_title").text              # extracts data from parse tree
paragraph_text = nasa_soup.find("div", class_="rollover_description_inner").text

In [8]:
news_title                                                                   # displays extracted title

'\n\nMars Reconnaissance Orbiter Preparing for Years Ahead\n\n'

In [9]:
paragraph_text                                                               # displays extracted paragraph text

"\nNASA's Mars Reconnaissance Orbiter (MRO) has begun extra stargazing to help the space agency accomplish advances in Mars exploration over the next decade.\n"

#### JPL Mars Space Images - Visit the url for JPL's Featured Space Image. Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.

In [10]:
browser = Browser('chrome', headless=False)                                  # defines browser
jpl_base_url = 'https://www.jpl.nasa.gov'                                    # defines base URL
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"      # defines search URL

In [11]:
browser.visit(jpl_url)                                                       # visits search URL with automated browser
jpl_html = browser.html                                                      # acquires response from URL
jpl_soup = bs(jpl_html, 'html.parser')                                       # sends response to beautiful soup

In [12]:
feature_img = jpl_soup.find('article', class_='carousel_item')               # extracts data from parse tree
feature_href = feature_img.a['data-link']
link = jpl_base_url + feature_href
browser.visit(link)                                                          # visits found URL with automated browser
jpl_html = browser.html                                                      # acquires response from URL
jpl_soup = bs(jpl_html, 'html.parser')                                       # sends response to beautiful soup
image = jpl_soup.find('figure', class_='lede')                               # extracts data from parse tree
img_link = image.a['href']
featured_image_url = jpl_base_url + img_link                                 # concatenates URL

In [13]:
featured_image_url                                                           # displays URL

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18289_hires.jpg'

In [14]:
browser.quit()                                                               # closes automated browser

#### Mars Weather - Visit the Mars Weather twitter account and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather

In [15]:
browser = Browser('chrome', headless=False)                                  # defines browser
tweet_url = 'https://twitter.com/marswxreport?lang=en'                       # defines search URL
browser.visit(tweet_url)                                                     # visits search URL with automated browser

In [16]:
tweet_html = browser.html                                                    # acquires response from URL
tweet_soup = bs(tweet_html, 'html.parser')                                   # sends response to beautiful soup

In [17]:
# extracts data from parse tree
tweet = tweet_soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')

In [18]:
mars_weather = tweet.text.strip()                                            # stores cleaned tweet

In [19]:
mars_weather                                                                 # displays tweet

'Sol 1955 (Feb 04, 2018), Sunny, high -21C/-5F, low -77C/-106F, pressure at 7.45 hPa, daylight 05:41-17:27'

In [20]:
browser.quit()                                                               # closes automated browser

#### Mars Facts - Visit the Mars Facts webpage here and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

In [21]:
facts_url = 'https://space-facts.com/mars/'                                  # defines search URL

In [22]:
fact_list = pd.read_html(facts_url)                                          # extracts data from URL using pandas

In [23]:
facts_df = fact_list[0]                                                      # converts list to dataframe

In [24]:
facts_df.columns = ['', 'Mars Facts']                                        # cleans dataframe
facts_df.set_index('', inplace=True)

In [25]:
facts_df                                                                     # displays dataframe

Unnamed: 0,Mars Facts
,
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [26]:
facts_table = facts_df.to_html()                                             # converts dataframe to html table
print(facts_table)                                                           # displays html table

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars Facts</th>
    </tr>
    <tr>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


#### Mars Hemisperes - Visit the USGS Astrogeology site to obtain high resolution images for each of Mars' hemispheres.

In [27]:
browser = Browser('chrome', headless=False)                                  # defines browser                                 

# defines search URL
usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(usgs_url)                                                      # visits search URL with automated browser

In [28]:
usgs_html = browser.html                                                     # acquires response from URL
usgs_soup = bs(usgs_html, 'html.parser')                                     # sends response to beautiful soup

In [29]:
hemisphere_image_urls = []                                                   # Creates empty list

products = usgs_soup.find('div', class_='result-list')                       # finds products
hemispheres = products.find_all('div', class_='item')                        # finds hemispheres

for hemisphere in hemispheres:                                               # iterates through hemispheres
    title = hemisphere.find('div', class_='description')
    
    title_text = title.a.text                                                # extracts cleaned title
    title_text = title_text.replace(' Enhanced', '')
    browser.click_link_by_partial_text(title_text)                           # (automated) click
    
    usgs_html = browser.html                                                 # acquires response from URL
    usgs_soup = bs(usgs_html, 'html.parser')                                 # sends response to beautiful soup
    
    image = usgs_soup.find('div', class_='downloads').find('ul').find('li')  # extracts image url
    img_url = image.a['href']
    
    hemisphere_image_urls.append({'title': title_text, 'img_url': img_url})  # adds dictionary to list  
    
    browser.click_link_by_partial_text('Back')                               # (automated) click back

In [30]:
hemisphere_image_urls                                                        # displays list of dictionaries

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere'}]

In [31]:
browser.quit()                                                               # closes automated browser