In [1]:
# Imports
import pandas as pd
import requests
import re
import time
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import pymongo

# Scraping

In [2]:
# Scrape the NASA Mars News site
url = "https://mars.nasa.gov/news/" 
response = requests.get(url)

## Mars News

In [3]:
# Collect news title and paragrapgh text
soup = bs(response.text, 'html5lib')
news_title = soup.find("div", class_="content_title").text
news_p = soup.find("div", class_ = "rollover_description_inner").text
print(news_title)
print(news_p)



NASA Invests in Visionary Technology 



NASA is investing in technology concepts, including several from JPL, that may one day be used for future space exploration missions.



## Featured Image

In [4]:
#JPL Mars Space Images - Featured Image
# visit the JPL website. Use splinter to navigate the site.
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)

# Click each of the links to the hemispheres 
# in order to find the image url to the full resolution image.
time.sleep(10)
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(10)
browser.click_link_by_partial_text('more info')
time.sleep(10)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = bs(html, 'html.parser')

# Scrape the featured image.
results = soup.find('article')
second_link = results.find('figure', 'lede').a['href']
jpl_link = "https://www.jpl.nasa.gov"
featured_image_url = jpl_link + second_link

In [5]:
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18049_hires.jpg


## Mars Weather

In [6]:
#Mars Weather from Twitter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
weather_url='https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)
time.sleep(5)
weather_html = browser.html
weather_soup = bs(weather_html, 'html5lib')
MarsWxReport = weather_soup.find('ol', class_='stream-items')
mars_weather = MarsWxReport.find('p', class_="TweetTextSize").text

In [7]:
print(mars_weather)

Sol 2054 (May 17, 2018), Sunny, high 4C/39F, low -72C/-97F, pressure at 7.40 hPa, daylight 05:21-17:20


## Mars Facts

In [8]:
#Mars Facts
#Visit the Mars Facts webpage here and use Pandas to scrape the table containing facts about the planet including
#Diameter, Mass, etc.
url = 'https://space-facts.com/mars/'
table = pd.read_html(url)
table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [9]:
df = table[0]
df.columns = ['0','1']
df.rename(columns={'0':'Parameters', '1':'Values'}, inplace=True)
df.set_index('Parameters', inplace=True)
df

Unnamed: 0_level_0,Values
Parameters,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [10]:
#Use Pandas to convert the data to a HTML table string.
mars_facts_html = df.to_html()
mars_facts_html.replace('\n', '')
print(mars_facts_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Values</th>
    </tr>
    <tr>
      <th>Parameters</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


In [17]:
df.to_html('facts_table.html')

## Mars Hemispheres

In [12]:
# Mars Hemispheres
#Visit the USGS Astrogeology site here to obtain high resolution images for each of Mar's hemispheres.

url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
html = browser.html
soup = bs(html, 'html.parser')

# Empty list of image urls
hemisphere_image_urls = []

products = soup.find("div", class_ = "result-list" )
hemispheres = products.find_all("div", class_="item")

for hemisphere in hemispheres:
    title = hemisphere.find("h3").text
#get rid of "Enhanced" in the titles    
    title = title.replace("Enhanced", "")
    second_link = hemisphere.find("a")["href"]
    whole_link = "https://astrogeology.usgs.gov/" + second_link    
    browser.visit(whole_link)
    html = browser.html
    soup=bs(html, "html.parser")
    downloads = soup.find("div", class_="downloads")
    image_url = downloads.find("a")["href"]
    hemisphere_image_urls.append({"title": title, "img_url": image_url})
   

In [13]:
 hemisphere_image_urls

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere '},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere '},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere '},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere '}]