# Mission to Mars: Web Scraping
## Dependencies:

In [1]:
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser

### Splinter Browser Instantiation

In [2]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)

### Scrape and parse function definition

In [3]:
def give_me_soup(url):
    browser.visit(url)
    html = browser.html
    soup = bs(html, 'html.parser')
    return soup

## Article Scrape:

### Scrape HTML and parse

In [4]:
soup = give_me_soup('https://mars.nasa.gov/news/?page=0&per_page=15&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest')

### Extract top article title and summary

In [5]:
article_title = soup.find('div', class_='content_title').get_text().strip()
article_summary = soup.find("div", class_="article_teaser_body").get_text().strip()

In [6]:
print(article_title, ":\n", article_summary)

Nearly a Decade After Mars Phoenix Landed, Another Look :
 A recent view from Mars orbit of the site where NASA's Phoenix Mars mission landed on far-northern Mars nearly a decade ago captures changes.



## Feature Image Scrape

### Scrape HTML and parse

In [7]:
soup = give_me_soup('https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars')

### Extract featured image tag

In [8]:
imageloc = soup.find("article", class_="carousel_item")

### Extract image URL from style attribute

In [9]:
endurl = imageloc['style'][24:75]
begurl = 'https://www.jpl.nasa.gov/'
imgurl = begurl + endurl
print(imgurl)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16606-1920x1200.jpg


## Tweet Scrape

### Scrape HTML and parse

In [10]:
soup = give_me_soup('https://twitter.com/marswxreport?lang=en')

### Extract most recent tweet

In [11]:
tweet = soup.find("p", class_="tweet-text").get_text().strip()
print(tweet)

Sol 1970 (Feb 20, 2018), Sunny, high -16C/3F, low -77C/-106F, pressure at 7.32 hPa, daylight 05:38-17:26


## Table Scrape

### Scrape HTML and parse

In [12]:
soup = give_me_soup('https://space-facts.com/mars/')

### Extract table tag

In [13]:
table_tag = soup.find('table', class_='tablepress tablepress-id-mars')

### Create dataframe from table tag

In [14]:
df = pd.read_html(str(table_tag))[0]

### Convert dataframe back to HTML

In [15]:
html_output = df.to_html(index=False, header=False).replace('\n', '')

## Hemisphere Image Scrape

### Scrape each url for the high-res image url

#### This is sort of a half-scrape, could have just cut and pasted URLs...

In [16]:
def mars_imgurl_gen(url):
    soup = give_me_soup(url)
    image_tag = soup.find('img', class_='wide-image')
    base_url = 'https://astrogeology.usgs.gov'
    high_res_path = image_tag['src']
    image_url = base_url + high_res_path
    return image_url

In [17]:
cerberus = mars_imgurl_gen('https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced')
schiaparelli = mars_imgurl_gen('https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced')
syrtis_major = mars_imgurl_gen('https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced')
valles_marineris = mars_imgurl_gen('https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced')

### Construct hemisphere image dictionary

In [18]:
hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": valles_marineris},
    {"title": "Cerberus Hemisphere", "img_url": cerberus},
    {"title": "Schiaparelli Hemisphere", "img_url": schiaparelli},
    {"title": "Syrtis Major Hemisphere", "img_url": syrtis_major}
]

## Construct output dictionary

In [19]:
output = {'article_title': article_title,
          'article_summary': article_summary,
          'feature_image_url': imgurl,
          'tweet': tweet,
          'table_html': html_output,
          'hemisphere_list': hemisphere_image_urls}

### Extracting elements from output

In [21]:
output['article_title']

'Nearly a Decade After Mars Phoenix Landed, Another Look'

In [24]:
output['hemisphere_list'][0]

{'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
 'title': 'Valles Marineris Hemisphere'}

In [31]:
output

{'article_summary': "A recent view from Mars orbit of the site where NASA's Phoenix Mars mission landed on far-northern Mars nearly a decade ago captures changes.",
 'article_title': 'Nearly a Decade After Mars Phoenix Landed, Another Look',
 'feature_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16606-1920x1200.jpg',
 'hemisphere_list': [{'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
   'title': 'Valles Marineris Hemisphere'},
  {'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
   'title': 'Cerberus Hemisphere'},
  {'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
   'title': 'Schiaparelli Hemisphere'},
  {'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
   't