# Mission To Mars
* UofMN Data Visualization and Analytics Bootcamp
* Homework 10 / Week 12 - Web Scraping and MongoDB
* Created by: Chris Howard
* 05/17/2019

In [1]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import requests
import pymongo

In [2]:
executable_path = {'executable_path' : 'C:\chromedrv\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False) #Show browser here for testing, change to 'True' for app.py

In [3]:
# Mars News
news_url = 'https://mars.nasa.gov/news/'
news_resp = requests.get(news_url)
news_soup = BeautifulSoup(news_resp.text, 'lxml')
headline = news_soup.find('div', class_="slide")
news_title = headline.find('div', class_="content_title").text.strip('\n')
news_summary = headline.find('div', class_="rollover_description_inner").text.strip('\n')
print(news_title)
print(news_summary)

Why This Martian Full Moon Looks Like Candy
For the first time, NASA's Mars Odyssey orbiter has caught the Martian moon Phobos during a full moon phase. Each color in this new image represents a temperature range detected by Odyssey's infrared camera.


In [4]:
# JPL Mars Space Images - Featured
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars#submit'
image_resp = requests.get(image_url)
image_soup = BeautifulSoup(image_resp.text, 'lxml')
image = image_soup.find('li', class_='slide')
featured_image_url = "https://www.jpl.nasa.gov" + image.a['data-fancybox-href']
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23221_hires.jpg


In [5]:
# Twitter weather report
twitter_url = 'https://twitter.com/marswxreport?lang=en'
twitter_resp = requests.get(twitter_url)
twitter_soup = BeautifulSoup(twitter_resp.text, 'lxml')
tweets = twitter_soup.find_all('div', class_='content')
for tweet in tweets:
    # Find first tweet that is not a re-tweet, this will be the most recent weather report
    if tweet.a['href'] == '/MarsWxReport':
        recent_tweet = tweet
        break
mars_weather = recent_tweet.p.text
mars_weather = mars_weather.partition(' hPapic')[0] #Remove pic URL that can't be used in Flask App (404 error)
print(mars_weather)

InSight sol 167 (2019-05-17) low -100.5ºC (-148.9ºF) high -20.4ºC (-4.6ºF)
winds from the SW at 4.7 m/s (10.6 mph) gusting to 13.5 m/s (30.3 mph)
pressure at 7.50


In [6]:
# Mars facts table
facts_url = 'https://space-facts.com/mars/'
mars_table = pd.read_html(facts_url)
mars_df = mars_table[0]
mars_df = mars_df.rename(columns={0:'description',1:'values'})
mars_df = mars_df.set_index('description')
mars_df = mars_df.to_html()


In [7]:
# Mars Hemispheres
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
hemi_list = ['Cerberus', 'Schiaparelli', 'Syrtis', 'Valles']
hemisphere_image_urls = []
browser.visit(hemi_url)

for hemi in hemi_list:
    browser.click_link_by_partial_text(hemi)
    hemi_html = browser.html
    hemi_soup = BeautifulSoup(hemi_html, 'html.parser')
    url = hemi_soup.find('div', class_='downloads').ul.li.a['href']
    name = hemi_soup.title.text.partition(' Enhanced')[0]
    hemisphere_image_urls.append({'title':name, 'img_url':url})
    browser.back()

print(hemisphere_image_urls)


[{'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [8]:
# Create connection to MongoDB
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [9]:
# Generate dictionary to add to DB, print it out to confirm.
post = {
    'news': {'title':news_title, 'summary':news_summary},
    'feat_img': featured_image_url,
    'weather': mars_weather,
    'facts': mars_df,
    'hemi_img': hemisphere_image_urls
}
print(post)

{'news': {'title': 'Why This Martian Full Moon Looks Like Candy', 'summary': "For the first time, NASA's Mars Odyssey orbiter has caught the Martian moon Phobos during a full moon phase. Each color in this new image represents a temperature range detected by Odyssey's infrared camera."}, 'feat_img': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23221_hires.jpg', 'weather': 'InSight sol 167 (2019-05-17) low -100.5ºC (-148.9ºF) high -20.4ºC (-4.6ºF)\nwinds from the SW at 4.7 m/s (10.6 mph) gusting to 13.5 m/s (30.3 mph)\npressure at 7.50', 'facts': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>values</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 

In [10]:
# Add/update database with 'post' data
db = client.mars_db
collection = db.new_info

collection.replace_one({}, post, True)

<pymongo.results.UpdateResult at 0x1dfb386f3c8>