# Step 1 - Scraping

In [3]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from splinter import Browser

## NASA Mars News

In [4]:
# NASA page to be scraped
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

In [5]:
# Retrieve page with the requests module
html = requests.get(url).text

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')


print(soup.prettify())

<!DOCTYPE html>
<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <!-- Always force latest IE rendering engine or request Chrome Frame -->
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <script type="text/javascript">
   window.NREUM||(NREUM={});NREUM.info={"beacon":"bam.nr-data.net","errorBeacon":"bam.nr-data.net","licenseKey":"5e33925808","applicationID":"59562082","transactionName":"JVcPR0MLWApSRU1eAQVVEhxSC1oSUlkWbBMHXwRAHhdcCUA=","queueTime":0,"applicationTime":395,"agent":""}
  </script>
  <script type="text/javascript">
   (window.NREUM||(NREUM={})).loader_config={xpid:"VQcPUlZTDxAFXVRUBQEPVA==",licenseKey:"5e33925808",applicationID:"59562082"};window.NREUM||(NREUM={}),__nr_require=function(t,n,e){function r(e){if(!n[e]){var o=n[e]={exports:{}};t[e][0].call(o.exports,function(n){var o=t[e][1][n];return r(o||n)},o,o.exports)}return n[e].exports}if("function"==ty

In [17]:
# Extracting the results we want (News Title)
results_titles = soup.find_all('div', class_="content_title")
print(results_titles)

[<div class="content_title">
<a href="/news/8641/nasas-perseverance-mars-rover-gets-its-wheels-and-air-brakes/">
NASA's Perseverance Mars Rover Gets Its Wheels and Air Brakes
</a>
</div>, <div class="content_title">
<a href="/news/8634/109-million-names-now-aboard-nasas-perseverance-mars-rover/">
10.9 Million Names Now Aboard NASA's Perseverance Mars Rover
</a>
</div>, <div class="content_title">
<a href="/news/8622/virginia-middle-school-student-earns-honor-of-naming-nasas-next-mars-rover/">
Virginia Middle School Student Earns Honor of Naming NASA's Next Mars Rover
</a>
</div>, <div class="content_title">
<a href="/news/8606/nasa-prepares-for-moon-and-mars-with-new-addition-to-its-deep-space-network/">
NASA Prepares for Moon and Mars With New Addition to Its Deep Space Network
</a>
</div>, <div class="content_title">
<a href="/news/8603/nasa-administrator-statement-on-moon-to-mars-initiative-fy-2021-budget/">
NASA Administrator Statement on Moon to Mars Initiative, FY 2021 Budget
</a

In [18]:
# Extracting the results we want (Paragraph Text)
results_paragraphs = soup.find_all('div', class_="rollover_description_inner")
print(results_paragraphs)
print('---------------------------------------------')

[<div class="rollover_description_inner">
After the rover was shipped from JPL to Kennedy Space Center, the team is getting closer to finalizing the spacecraft for launch later this summer.
</div>, <div class="rollover_description_inner">
As part of NASA's 'Send Your Name to Mars' campaign, they've been stenciled onto three microchips along with essays from NASA's 'Name the Rover' contest. Next stop: Mars.
</div>, <div class="rollover_description_inner">
NASA chose a seventh-grader from Virginia as winner of the agency's "Name the Rover" essay contest. Alexander Mather's entry for "Perseverance" was voted tops among 28,000 entries. 
</div>, <div class="rollover_description_inner">
Robotic spacecraft will be able to communicate with the dish using radio waves and lasers.
</div>, <div class="rollover_description_inner">
Jim Bridenstine addresses NASA's ambitious plans for the coming years, including Mars Sample Return.
</div>, <div class="rollover_description_inner">
155 students from ac

In [19]:
# Loop and extract the titles
titles = []
for result in results_titles:
    try:
        title = result.find('a')
        titles.append(result.text.strip())        
        print(result.text.strip())        
        print('---------------------------------------------')
    except AttributeError as e:
        print(e)

NASA's Perseverance Mars Rover Gets Its Wheels and Air Brakes
---------------------------------------------
10.9 Million Names Now Aboard NASA's Perseverance Mars Rover
---------------------------------------------
Virginia Middle School Student Earns Honor of Naming NASA's Next Mars Rover
---------------------------------------------
NASA Prepares for Moon and Mars With New Addition to Its Deep Space Network
---------------------------------------------
NASA Administrator Statement on Moon to Mars Initiative, FY 2021 Budget
---------------------------------------------
NASA's Mars 2020 Rover Closer to Getting Its Name
---------------------------------------------


In [22]:
# Loop and extract the paragraphs
paragraphs = []
for result in results_paragraphs:
    try:
        paragraphs.append(result.text.strip())        
        print(result.text.strip())        
        print('---------------------------------------------')
    except AttributeError as e:
        print(e)

After the rover was shipped from JPL to Kennedy Space Center, the team is getting closer to finalizing the spacecraft for launch later this summer.
---------------------------------------------
As part of NASA's 'Send Your Name to Mars' campaign, they've been stenciled onto three microchips along with essays from NASA's 'Name the Rover' contest. Next stop: Mars.
---------------------------------------------
NASA chose a seventh-grader from Virginia as winner of the agency's "Name the Rover" essay contest. Alexander Mather's entry for "Perseverance" was voted tops among 28,000 entries.
---------------------------------------------
Robotic spacecraft will be able to communicate with the dish using radio waves and lasers.
---------------------------------------------
Jim Bridenstine addresses NASA's ambitious plans for the coming years, including Mars Sample Return.
---------------------------------------------
155 students from across the U.S. have been chosen as semifinalists in NASA's 

## JPL Mars Space Images - Featured Image

In [25]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [26]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [27]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
browser.links.find_by_partial_text('FULL IMAGE')


<splinter.element_list.ElementList at 0x7d25763550>

In [28]:
src=soup('a', class_='button fancybox')[0]['data-fancybox-href']


In [31]:
featured_image_url = f'https://www.jpl.nasa.gov' + src

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA22892_ip.jpg


## Mars Weather

In [34]:
# Visit the Mars Weather twitter account here and scrape the latest Mars weather tweet from the page.

mars_twitter_url = 'https://twitter.com/marswxreport?lang=en'

html2 = requests.get(mars_twitter_url).text

soup2 = BeautifulSoup(html2, 'html.parser')

print(soup2.prettify())

<!DOCTYPE html>
<html data-scribe-reduced-action-queue="true" lang="en">
 <head>
  <meta charset="utf-8"/>
  <script nonce="SPx+W7XTG/TpYQR3gpaU5w==">
   !function(){window.initErrorstack||(window.initErrorstack=[]),window.onerror=function(r,i,n,o,t){r.indexOf("Script error.")>-1||window.initErrorstack.push({errorMsg:r,url:i,lineNumber:n,column:o,errorObj:t})}}();
  </script>
  <script id="bouncer_terminate_iframe" nonce="SPx+W7XTG/TpYQR3gpaU5w==">
   if (window.top != window) {
  window.top.postMessage({'bouncer': true, 'event': 'complete'}, '*');
}
  </script>
  <script id="swift_action_queue" nonce="SPx+W7XTG/TpYQR3gpaU5w==">
   !function(){function e(e){if(e||(e=window.event),!e)return!1;if(e.timestamp=(new Date).getTime(),!e.target&&e.srcElement&&(e.target=e.srcElement),document.documentElement.getAttribute("data-scribe-reduced-action-queue"))for(var t=e.target;t&&t!=document.body;){if("A"==t.tagName)return;t=t.parentNode}return i("all",o(e)),a(e)?(document.addEventListener||(e=o(

In [35]:
# extracting results (the latest Mars weather tweet)
results2 = soup2.find_all('p', class_="TweetTextSize")
results2

[<p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">InSight sol 481 (2020-04-03) low -93.9ºC (-137.0ºF) high -5.9ºC (21.4ºF)
 winds from the WNW at 5.3 m/s (11.8 mph) gusting to 18.3 m/s (40.9 mph)
 pressure at 6.50 hPa<a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/1cWBpQA4iZ">pic.twitter.com/1cWBpQA4iZ</a></p>,
 <p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">InSight sol 480 (2020-04-02) low -93.0ºC (-135.5ºF) high -6.7ºC (19.9ºF)
 winds from the SW at 5.8 m/s (13.1 mph) gusting to 17.2 m/s (38.5 mph)
 pressure at 6.50 hPa<a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/8oUTHBmcXp">pic.twitter.com/8oUTHBmcXp</a></p>,
 <p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">InSight sol 479 (2020-04-01) low -93.8ºC (-136.8ºF) hig

In [36]:
tweets = []
for result in results2:
    tweets.append(result.get_text())    
    print(result.get_text())
    print('-----------------')

InSight sol 481 (2020-04-03) low -93.9ºC (-137.0ºF) high -5.9ºC (21.4ºF)
winds from the WNW at 5.3 m/s (11.8 mph) gusting to 18.3 m/s (40.9 mph)
pressure at 6.50 hPapic.twitter.com/1cWBpQA4iZ
-----------------
InSight sol 480 (2020-04-02) low -93.0ºC (-135.5ºF) high -6.7ºC (19.9ºF)
winds from the SW at 5.8 m/s (13.1 mph) gusting to 17.2 m/s (38.5 mph)
pressure at 6.50 hPapic.twitter.com/8oUTHBmcXp
-----------------
InSight sol 479 (2020-04-01) low -93.8ºC (-136.8ºF) high -8.0ºC (17.6ºF)
winds from the SW at 5.3 m/s (11.8 mph) gusting to 18.3 m/s (41.0 mph)
pressure at 6.50 hPapic.twitter.com/SWhhe9vMHL
-----------------
InSight sol 478 (2020-03-31) low -93.3ºC (-135.9ºF) high -8.5ºC (16.7ºF)
winds from the S at 6.1 m/s (13.6 mph) gusting to 18.1 m/s (40.5 mph)
pressure at 6.50 hPapic.twitter.com/hlo9Yr5bKo
-----------------
InSight sol 477 (2020-03-30) low -93.0ºC (-135.4ºF) high -6.5ºC (20.4ºF)
winds from the SW at 5.7 m/s (12.7 mph) gusting to 18.8 m/s (42.1 mph)
pressure at 6.50 hPa

In [37]:
# Saving the latest Mars weather tweet text as a variable called mars_weather
mars_weather = tweets[0]
mars_weather

'InSight sol 481 (2020-04-03) low -93.9ºC (-137.0ºF) high -5.9ºC (21.4ºF)\nwinds from the WNW at 5.3 m/s (11.8 mph) gusting to 18.3 m/s (40.9 mph)\npressure at 6.50 hPapic.twitter.com/1cWBpQA4iZ'

## Mars Facts

In [43]:
#Visit the Mars Facts webpage, use Pandas to scrape the table containing facts about the planet 
mars_fact_url = 'https://space-facts.com/mars/'
mars_table = pd.read_html(mars_fact_url)
mars_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
           

In [53]:
#Use Pandas to convert the data to a HTML table string.
df = mars_table[0]
df.columns = ['Fact', 'Values']
df

Unnamed: 0,Fact,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [54]:
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Fact</th>\n      <th>Values</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td

In [55]:
df.to_html('mars_facts_table.html')

## Mars Hemispheres

In [59]:
#Visit the USGS Astrogeology site here to obtain high resolution images for each of Mar's hemispheres.

mars_hemispheres_images = [
    {'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
    {'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
    {'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
    {'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}
]

In [60]:
mars_hemispheres_images

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]