In [1]:
# !pip install splinter
# !pip install selenium
# !pip install pymongo
# !pip install flask_pymongo

In [2]:
# Dependencies
import os
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests 
import pymongo
import flask_pymongo
from splinter import Browser #Choose the executable path to the driver when we use the splinter

WINDOWS USER or MAC USER (choose the appropriate executable path to the driver)

In [3]:
## Mac Users:
## https://splinter.readthedocs.io/en/latest/drivers/chrome.html
# !which chromedriver
# executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
# browser = Browser('chrome', **executable_path, headless=False)

In [4]:
## Windows Users: 
## https://splinter.readthedocs.io/en/latest/drivers/chrome.html
## executable_path = {'executable_path': 'chromedriver.exe'}
## okay, the executable path in the environment path is not even necessary, I needed to include the 
## chromedriver.exe in the same folder as this Mission_to_Mars.ipynb file or use the executable path below for me personally. 
executable_path = {'executable_path': r'''C:\Users\cfole\OneDrive\Documents\chromedriver.exe'''}
browser = Browser('chrome', **executable_path, headless=False)

MARS NEWS (NASA)

In [5]:
#Visit NASA Page
news_url = "https://mars.nasa.gov/news/"
browser.visit(news_url)

# Use BS to convert to HTML
# Create Beautiful Soup Object
news_html = browser.html

# Parse HTML with Beautiful Soup
news_soup = bs(news_html, 'html.parser')

# Retrieve the title and news_paragraph
news_title = news_soup.find('div', class_='content_title').text
news_p = news_soup.find('div', class_='article_teaser_body').text

# View scraped data 
print(f'NASA News Title: {news_title}')
print(f'NASA News Paragraph: {news_p}')

NASA News Title: After a Reset, Curiosity Is Operating Normally
NASA News Paragraph: NASA's Mars rover Curiosity is in good health but takes a short break while engineers diagnose why it reset its computer. 


MARS SPACE IMAGES (JPL)

In [6]:
# Visit JPL for Mars Space Image
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url)

# Use BS to convert to HTML
# Create Beautiful Soup Object 
image_html = browser.html

# Parse HTML with Beautiful Soup
image_soup = bs(image_html, 'html.parser')

# Retrieve background-image url
wallpaper_image_url  = image_soup.find('article')['style'].replace('background-image: url(','').replace(');', '')[1:-1]

# Website Url 
base_url = 'https://www.jpl.nasa.gov'

# Concatenated Website Url
featured_image_url = base_url + wallpaper_image_url

# View link to mars image
print(f'The Wallpaper Image on JPL is: {featured_image_url}')

The Wallpaper Image on JPL is: https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17900-1920x1200.jpg


MARS WEATHER (TWITTER)

In [7]:
# Visit Twitter for Weather
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)

# Use BS to convert to HTML
# Create Beautiful Soup Object 
weather_html = browser.html

# Parse HTML with Beautiful Soup
weather_soup = bs(weather_html, "html.parser")

# Retrieve Mars Weather
weather = weather_soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text

# View the Weather Report
print(f'Mars Weather Report: {weather}')

Mars Weather Report: Curiosity is again operating normally following a boot problem first experienced last Friday. Look for more Gale Crater weather conditions soon.
https://www.jpl.nasa.gov/news/news.php?feature=7339 …pic.twitter.com/gFMfXyeWDa


MARS FACTS (theplanets.org)

In [22]:
# Visit Mars profile (facts) url
# The assignment suggested we scrape the table from this https://space-facts.com/mars/,
profile_url = 'https://space-facts.com/mars/'
# however the site was intermittently down, so utilized a profile table from another site.
# profile_url = 'https://theplanets.org/mars/'

# Get the table from the url
profileTable = pd.read_html(profile_url)

# Select the table from the url
profile_df = profileTable[0]

# Column Names
profile_df.columns = ['Feature','Value']

# View the dataFrame
profile_df

Unnamed: 0,Feature,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [9]:
# Convert the data to a HTML table string
htmlProfileTable = profile_df.to_html(classes = 'table table-striped')
htmlProfileTable

# Clean Up Unwanted New Lines
htmlProfileTable.replace('\n', '')

# View the Table Code
print(htmlProfileTable)

<table border="1" class="dataframe table table-striped">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Value</th>
    </tr>
    <tr>
      <th>Key</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


MARS HEMISPHERE IMAGES (USGS Astrogeology)

In [10]:
# Visit Astrogeology website for the hemispheres 
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)

# Use BS to convert to HTML
# Create Beautiful Soup Object
hemispheres_html = browser.html

# Parse HTML with Beautiful Soup
hemispheres_soup = bs(hemispheres_html, 'html.parser')

# Create empty list for hemisphere urls 
hemispheres_image_urls = []

# Create Variable for the hemisphere links
#hemispheres_links = browser.find_by_css('a.product-item h3')
hemispheres_links = browser.find_by_css('h3')

# Create For Loop
# i iterates through the length of hemispheres, 4
for i in range(len(hemispheres_links)):
    # Create empty Dictionary
    hemispheres_dict = {}
    # Find the image on the hemispheres_url with class h3 and select it
    browser.find_by_css('h3')[i].click()
    
    # For the sample, select the title and image url
    sample_link = browser.find_link_by_text('Sample').first
    # Find the title on the specific hemisphere selected url with class h2 and select it 
    hemispheres_dict['title'] = browser.find_by_css('h2').text
    hemispheres_dict['img_url'] = sample_link['href']
    
    # Append the Value to the hemispheres_image_urls list
    hemispheres_image_urls.append(hemispheres_dict)
    
    # Go back so we can select the next sample hemisphere
    browser.back()

# View the Hemispheres Dictionary    
print(f'Hemispheres Dictionary (Title, Image URL): {hemispheres_image_urls}')

Hemispheres Dictionary (Title, Image URL): [{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [11]:
browser.quit()