# Mission to Mars Datascraping Challenge

# Setup

In [1]:
# Declare Dependencies 
import pandas as pd
from bs4 import BeautifulSoup as bs4
import requests
import os
from splinter import Browser
import pymongo

In [2]:
!pip install splinter



In [3]:
# The default port used by MongoDB is 27017
# https://docs.mongodb.com/manual/reference/default-mongodb-port/
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [4]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

# might have to update mac privacy settings to allow chromium to work

/usr/local/bin/chromedriver


In [7]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

# NASA Mars News (splinter)

* Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [10]:
# URL of page to be scraped
mars_url = 'https://mars.nasa.gov/news/'
browser.visit(mars_url)


In [11]:
# Create HTML object
html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = bs4(html, 'html.parser')
# type(soup)

# Assign news_title and news_paragraph variables to reference later 
news_title = soup.find('div', class_='content_title').find('a').text
news_p = soup.find('div', class_='article_teaser_body').text

# Display scrapped data 
print(news_title)
print(news_p)

Mars InSight Lander to Push on Top of the 'Mole'
Engineers have a plan for pushing down on the heat probe, which has been stuck at the Martian surface for a year.


# JPL Mars Space Images - Featured Image (splinter)

* Visit the url for JPL Featured Space Image here.

* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.

* Make sure to find the image url to the full size .jpg image.

* Make sure to save a complete url string for this image.

In [12]:
# Visit https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars
image_url_featured = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url_featured)
print(image_url_featured)

https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars


In [13]:
# Use splinter to navigate the site and find the image url for the current Featured Mars Image 
# and assign the url string to a variable called featured_image_url

# Create HTML object
image_html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = bs4(image_html, 'html.parser')
        
# Retrieve background_image
featured_image_url = soup.find('article')['style'].replace('background-image: url(','').replace(');', '')[1:-1]
  
# Define anchor url 
anchor_url = 'https://www.jpl.nasa.gov'

# Create featured_image_url
featured_image_url = anchor_url + featured_image_url

# Print featured_image_url link
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA19343-1920x1200.jpg'

# Mars Weather (splinter)

* Visit the Mars Weather twitter account here and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather.

In [16]:
# Visit the latest Mars weather tweet from the page
twitter_weather_url = 'https://twitter.com/marswxreport?lang=en'
result = requests.get(twitter_weather_url)

In [17]:
# Scrape the latest Mars weather tweet from the page

# Create HTML object
twitter_weather_html = result.text

# Create BeautifulSoup object; parse with 'html.parser'
twitter_weather_soup = bs4(twitter_weather_html, 'html.parser')

In [18]:
#Save the tweet text for the weather report as a variable called mars_weather.
twitter_mars_weather = twitter_weather_soup.find(class_='tweet-text').get_text()
twitter_mars_weather

'InSight sol 439 (2020-02-20) low -94.7ºC (-138.4ºF) high -9.3ºC (15.2ºF)\nwinds from the SSE at 6.5 m/s (14.6 mph) gusting to 23.2 m/s (51.9 mph)\npressure at 6.30 hPapic.twitter.com/VRiv3fQH9p'

# Mars Facts (pandas)

* Visit the Mars Facts webpage here and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.

In [19]:
# Visit the Mars Facts webpage
mars_facts_url = 'http://space-facts.com/mars/'

# Use Pandas to scrape the table containing facts about the planet
mars_facts = pd.read_html(mars_facts_url)
#print(mars_facts)

# Create dataframe
mars_space_facts_df = mars_facts[0]
#print(mars_space_facts_df)

# Assign columns
mars_space_facts_df.columns = ['Description','Value']

# Remove indexing
mars_space_facts_df.set_index('Description', inplace=True)

# Display mars_df
mars_space_facts_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [20]:
# convert the data to HTML
mars_space_facts_df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

# Mars Hemispheres (splinter)

* Visit the USGS Astrogeology site here to obtain high resolution images for each of Mar’s hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [79]:
# Visit https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemi_url)
print(hemi_url)

https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars


In [80]:
# Create HTML object
hemi_html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = bs4(hemi_html, 'html.parser')

In [82]:
# Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. 
# Use a Python dictionary to store the data using the keys img_url and title.
# Append the dictionary with the image url string and the hemisphere title to a list. 
# This list will contain one dictionary for each hemisphere.

# Create HTML Object
html_hemispheres = browser.html

# Parse HTML with Beautiful Soup
soup = bs4(html_hemispheres, 'html.parser')

# Retrieve items that contain hemisphere info
items = soup.find_all('div', class_='item')

# Create empty list for hemisphere urls
hemisphere_image_urls = []

# Define anchor url 
hemisphere_anchor_url = 'https://astrogeology.usgs.gov'

# Loop through the items previously stored
for i in items: 
    # Store title
    title = i.find('h3').text
    
    # Store link appendage for full image link
    append_img_url = i.find('a', class_='itemLink product-item')['href']
    
    # Visit full image website 
    browser.visit(hemisphere_anchor_url + append_img_url)
    
    # Create HTML object of individual hemisphere information website 
    append_img_html = browser.html
    
    # Parse HTML with Beautiful Soup for every individual hemisphere information website 
    soup = bs4(append_img_html, 'html.parser')
    
    # Build full image url 
    img_url = hemisphere_anchor_url + soup.find('img', class_='wide-image')['src']
    
    # Append to a list of dictionaries 
    hemisphere_image_urls.append({"title" : title, "img_url" : img_url})
    

# Display hemisphere_image_urls
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]