# Mission to Mars Web Scraping and Analysis Notebook

In [1]:
# import dependencies
import numpy as np
import pandas as pd
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup as bs
import time
import requests
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 87.0.4280
[WDM] - Get LATEST driver version for 87.0.4280
[WDM] - Driver [C:\Users\jfors\.wdm\drivers\chromedriver\win32\87.0.4280.20\chromedriver.exe] found in cache


 


## NASA Mars News

In [3]:
# Scrape the NASA Mars News Site and collect the latest News Titles and Paragraph Text. Assign the text to variables that you can reference later
# first set url to look up
mars_news_url = 'https://mars.nasa.gov/news/'
browser.visit(mars_news_url)

In [4]:
html = browser.html
soup = bs(html, 'html.parser')

# Retriece all elements that contain news title
news = soup.find_all('div', class_='list_text')
news

[<div class="list_text"><div class="list_date">November 18, 2020</div><div class="content_title"><a href="/news/8801/hear-audio-from-nasas-perseverance-as-it-travels-through-deep-space/" target="_self">Hear Audio From NASA's Perseverance As It Travels Through Deep Space</a></div><div class="article_teaser_body">The first to be rigged with microphones, the agency's latest Mars rover picked up the subtle sounds of its own inner workings during interplanetary flight.</div></div>,
 <div class="list_text"><div class="list_date">November 13, 2020</div><div class="content_title"><a href="/news/8798/mars-is-getting-a-new-robotic-meteorologist/" target="_self">Mars Is Getting a New Robotic Meteorologist</a></div><div class="article_teaser_body">Sensors on NASA's Perseverance will help prepare for future human exploration by taking weather measurements and studying dust particles.</div></div>,
 <div class="list_text"><div class="list_date">November 13, 2020</div><div class="content_title"><a hre

In [5]:
# use news variable to get the latest news
latest_news = news [0]
latest_news

<div class="list_text"><div class="list_date">November 18, 2020</div><div class="content_title"><a href="/news/8801/hear-audio-from-nasas-perseverance-as-it-travels-through-deep-space/" target="_self">Hear Audio From NASA's Perseverance As It Travels Through Deep Space</a></div><div class="article_teaser_body">The first to be rigged with microphones, the agency's latest Mars rover picked up the subtle sounds of its own inner workings during interplanetary flight.</div></div>

In [6]:
# grab title and paragraph text
latest_news_title = latest_news.find('div', class_='content_title').text
latest_news_paragraph = latest_news.find('div', class_="article_teaser_body").text
print(latest_news_title)
print(latest_news_paragraph)

Hear Audio From NASA's Perseverance As It Travels Through Deep Space
The first to be rigged with microphones, the agency's latest Mars rover picked up the subtle sounds of its own inner workings during interplanetary flight.


## JPL Mars Space Images

In [7]:
# Visit the url for JPL Featured Space Image, use Splinter to navigate the site and find the image url for the current Featured mars Image and assign the url string to a variable called feature_image_url
# Make sure to find the image url to the full size .jpg image
# Make sure to save a complete url string for this image

In [8]:
jpl_img_url ='https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_img_url)

In [9]:
html = browser.html
soup = bs(html, 'html.parser')

image_url = soup.find('section', class_='main_feature').a['data-fancybox-href']
featured_image_url = 'https://www.jpl.nasa.gov/' + image_url
featured_image_url

'https://www.jpl.nasa.gov//spaceimages/images/mediumsize/PIA14884_ip.jpg'

## Mars Facts

In [10]:
# Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
# Use Pandas to convert the data to a HTML table string

In [11]:
# read table with pandas
mars_facts_url = 'https://space-facts.com/mars/'
mars_facts_table = pd.read_html(mars_facts_url)
mars_facts_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [12]:
# convert to dataframe
mars_facts_df = mars_facts_table[0]
mars_facts_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
# rename columns
mars_facts_df = mars_facts_df.rename(columns={0:"Mars Profile", 1 : " "})
mars_facts_df

Unnamed: 0,Mars Profile,Unnamed: 2
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
# reset index to first column
mars_facts_df = mars_facts_df.set_index('Mars Profile')
mars_facts_df

Mars Profile,Unnamed: 1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [15]:
# Convert df to html
mars_facts_html = mars_facts_df.to_html()
mars_facts_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th></th>\n    </tr>\n    <tr>\n      <th>Mars Profile</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  <

In [16]:
# Strip unwanted newlines to cleanup table
mars_facts_html.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th></th>    </tr>    <tr>      <th>Mars Profile</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [17]:
# save to file
mars_facts_df.to_html('mars_facts_table.html')

## Mars Hemispheres

In [18]:
# Visit USGS Astrology site here to obtain high resolution images for each of Mar's hemispheres
# Save both the image url for the full resolution hemisphere image and the Hemisphere title containing the hemisphere name. Use a Python dictionary to stor the data using using the keys img_url and title.

In [19]:
mars_hemis_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(mars_hemis_url)

In [20]:
html = browser.html
soup = bs(html, 'html.parser')

In [21]:
# find hemishperes image and title
imgs_titles = soup.find_all('div', class_="item")
imgs_titles

[<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>,
 <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/08eac6e22c07fb1fe72223a79252de20_schiapa

In [25]:
# loop through results to get each hemisphere image url and title
# create empty list for hemisphere image Urls
hem_image_urls = []
main_url = 'https://astrogeology.usgs.gov'
# https://www.w3schools.com/python/trypython.asp?filename=demo_ref_string_replace
for item in imgs_titles:
    #get titles
    title = item.find('h3').text
    title = title.replace('Enhanced', "")
    
    # get link that leads to full res image
    start_url=item.find('a', class_='itemLink product-item')['href']
    
    #go to full res url
    browser.visit(main_url + start_url)
    
    #now at individual hemisphere website to grap full res image
    start_url = browser.html
    
    #parse with BeautifulSoup
    soup = bs(start_url,'html.parser')
    
    #get full res image source
    fullRes_url = main_url + soup.find('img', class_='wide-image')['src']

    #append into a list of dictionaries
    hem_image_urls = ({'title': title}, {"img_url": fullRes_url})
    print(f"----------------------------------------------------------")

#display list of dictionaries
hem_image_urls    

----------------------------------------------------------
----------------------------------------------------------
----------------------------------------------------------
----------------------------------------------------------


({'title': 'Valles Marineris Hemisphere '},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'})