In [1]:
import pandas as pd
import requests

from selenium import webdriver
from splinter import Browser
from bs4 import BeautifulSoup

### NASA Mars News

In [2]:
#Mars News url
news_url = 'https://mars.nasa.gov/news'

#browser setup
executable_path = {'executable_path':'C:\\Users\\monic\\Desktop\\Mars7\\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(news_url)

#create BeautifulSoup object and parse
soup = BeautifulSoup(browser.html, 'html.parser')

In [3]:
#find the latest news title and remove newline characters
news_title = soup.find(class_='content_title').get_text(strip=True)

news_title

'Watch NASA Build Its Next Mars Rover'

In [4]:
#find the latest news paragraph text and remove newline characters
news_p = soup.find(class_='rollover_description_inner').get_text(strip=True)

news_p

"A newly installed webcam offers the public a live, bird's-eye view of NASA's Mars 2020 rover as it takes shape at NASA's Jet Propulsion Laboratory."

### JPL Mars Space Images - Featured Image

In [5]:
#JPL Mars images url
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
img_base_url = 'https://www.jpl.nasa.gov'

#retrieve
result = requests.get(img_url)

#check if okay
result.ok

True

In [6]:
#make it text
html = result.text

#create BeautifulSoup object and parse
soup = BeautifulSoup(html, 'html.parser')
print(soup.prettify())

<!DOCTYPE html>
<!--[if IE 9]> <html class="no-js ie ie9" lang="en"> <![endif]-->
<!--[if IE 8]> <html class="no-js ie ie8" lang="en"> <![endif]-->
<html>
 <!-- START HEADER: "DEFAULT" -->
 <head>
  <meta charset="utf-8"/>
  <!-- Always force latest IE rendering engine or request Chrome Frame -->
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" name="viewport"/>
  <title>
   Space Images
  </title>
  <link href="/assets/stylesheets/manifest.css" media="all" rel="stylesheet" type="text/css"/>
  <link href="/assets/stylesheets/print.css" media="print" rel="stylesheet" type="text/css"/>
  <script src="/assets/javascripts/public_manifest.js" type="text/javascript">
  </script>
  <script src="/assets/javascripts/vendor/jquery.fancybox.js" type="text/javascript">
  </script>
  <script src="/assets/javascripts/vendor/jquery.fancybox-thumbs.js" type="text/javascript">
  </script>
 </he

In [7]:
#select the part that contains the image urls
fancy_box = soup.select('li.slide a.fancybox')

#make a list of just the data-fancybox-hrefs
img_list = [i.get('data-fancybox-href') for i in fancy_box]

#combine the base url with the first img url
featured_image_url = img_base_url + img_list[0]   

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23259_hires.jpg


### Mars Weather




In [8]:
#scrape the latest Mars weather tweet from the Mars Weather twitter account
twit_url = 'https://twitter.com/marswxreport?lang=en'
result = requests.get(twit_url)
result.ok

True

In [9]:
#make it text
html = result.text

#create BeautifulSoup object and parse
soup = BeautifulSoup(html, 'html.parser')

In [10]:
print(soup.prettify())

<!DOCTYPE html>
<html data-scribe-reduced-action-queue="true" lang="en">
 <head>
  <meta charset="utf-8"/>
  <script nonce="MBAYZO/AH7/g5GKOvSr+XA==">
   !function(){window.initErrorstack||(window.initErrorstack=[]),window.onerror=function(r,i,n,o,t){r.indexOf("Script error.")>-1||window.initErrorstack.push({errorMsg:r,url:i,lineNumber:n,column:o,errorObj:t})}}();
  </script>
  <script id="bouncer_terminate_iframe" nonce="MBAYZO/AH7/g5GKOvSr+XA==">
   if (window.top != window) {
  window.top.postMessage({'bouncer': true, 'event': 'complete'}, '*');
}
  </script>
  <script id="swift_action_queue" nonce="MBAYZO/AH7/g5GKOvSr+XA==">
   !function(){function e(e){if(e||(e=window.event),!e)return!1;if(e.timestamp=(new Date).getTime(),!e.target&&e.srcElement&&(e.target=e.srcElement),document.documentElement.getAttribute("data-scribe-reduced-action-queue"))for(var t=e.target;t&&t!=document.body;){if("A"==t.tagName)return;t=t.parentNode}return i("all",o(e)),a(e)?(document.addEventListener||(e=o(

In [11]:
#get the weather from the newest tweet
mars_weather = soup.find(class_='tweet-text').get_text()
mars_weather

'Watch the #Mars2020 rover being built live from @NASAJPLhttps://youtu.be/PaNiYPglK58\xa0'

### Mars Facts

In [12]:
#Mars space facts url
facts_url = 'https://space-facts.com/mars/'

#use Pandas to scrape the planet profile
profile = pd.read_html(facts_url)
profile               

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [13]:
#make a df
profile_df = profile[0]

#set index to the 0 column
profile_df.set_index(0, inplace=True)

#delete the index name
profile_df.index.names = [None]

#delete the column name
profile_df.columns = ['']

profile_df

Unnamed: 0,Unnamed: 1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [14]:
#convert the data to a HTML table string
html_table = profile_df.to_html()

#clean it up
html_table = html_table.replace('\n', '')

html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

### Mars Hemispheres

In [15]:
#visit the USGS Astrogeology site here to obtain high resolution images for each of Mar's hemispheres

#USGS url
usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

#visit
browser.visit(usgs_url)

#create BeautifulSoup object and parse
soup = BeautifulSoup(browser.html, 'html.parser')

In [16]:
print(soup.prettify())


<html lang="en">
 <head>
  <link href="//ajax.googleapis.com/ajax/libs/jqueryui/1.11.3/themes/smoothness/jquery-ui.css" rel="stylesheet" type="text/css"/>
  <title>
   Astropedia Search Results | USGS Astrogeology Science Center
  </title>
  <meta content="USGS Astrogeology Science Center Astropedia search results." name="description"/>
  <meta content="USGS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
  <meta content="x61hXXVj7wtfBSNOPnTftajMsZ5yB2W-qRoyr7GtOKM" name="google-site-verification"/>
  <!--<link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Open+Sans:400italic,400,bold"/>-->
  <link href="/css/main.css" media="screen" rel="stylesheet"/>
  <link href="/css/print.css" media="print" rel="styles

In [17]:
#get the 4 hemispheres (class of 'item')
hemispheres = soup.select('div.item')

hemispheres

[<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>,
 <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/7677c0a006b83871b5a2f66985ab5857_schiapa

In [18]:
#Loop through each hemisphere

hemisphere_image_urls = []

for h in hemispheres:
    title = (h.find('h3').text).replace(' Enhanced', '')
        
    #click the hemisphere
    browser.click_link_by_partial_text(title)
    
    #make new soup of that page
    soup = BeautifulSoup(browser.html, 'html.parser')
    
    #find the full image
    full = soup.find('a', text='Sample')
    
    #get the img url
    img_url = full['href']
    
    #make a dict and append to the list
    hemisphere_image_urls.append({'title': title, 'img_url': img_url})
    
    #go back 
    browser.back()

#close browser
browser.quit()    

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]