In [1]:
# import dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import requests
import pandas as pd
from pprint import pprint

### Step 1 - Scraping

In [2]:
#set path and browser variables for chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [5]:
# URL of page to be scraped
url_text = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"

In [6]:
#open chromedriver browser
browser.visit(url_text)

In [4]:
#create a Beautiful Soup object 
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

### NASA Mars News

In [55]:
# results = soup.find("div", class_="content_title").get_text(strip=True)
# results

'Mars Now'

In [61]:
#find the div class with the headline and article text 
results = soup.find("div", class_="list_text")

<div class="list_text"><div class="list_date">November 18, 2020</div><div class="content_title"><a href="/news/8801/hear-audio-from-nasas-perseverance-as-it-travels-through-deep-space/" target="_self">Hear Audio From NASA's Perseverance As It Travels Through Deep Space</a></div><div class="article_teaser_body">The first to be rigged with microphones, the agency's latest Mars rover picked up the subtle sounds of its own inner workings during interplanetary flight.</div></div>

In [75]:
#separate out the latest headline
news_title = results.find("div", class_="content_title").text
news_title

"Hear Audio From NASA's Perseverance As It Travels Through Deep Space"

In [76]:
#separate out the latest article text 
news_p = results.find("div", class_="article_teaser_body").text
news_p

"The first to be rigged with microphones, the agency's latest Mars rover picked up the subtle sounds of its own inner workings during interplanetary flight."

### JPL Mars Space Images - Featured Image

In [7]:
#url for finding featured image to scrape
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

In [29]:
#open chromedriver browser
browser.visit(image_url)

In [30]:
#navigate to large size image using splinter
browser.links.find_by_partial_text("FULL IMAGE").click()

In [31]:
#click on "more info" to get to large size image
browser.links.find_by_partial_text("more info").click()

In [32]:
#retrieve the image url for the full-size image
img_results = soup.find("figure", class_="lede") 
img_jpg = img_results.find("a")
img_link = img_jpg["href"]
#check results for img_link
#print(img_link)

#create complete link 
featured_image_url = f"https:www.jpl.nasa.gov{img_link}"
featured_image_url

'https:www.jpl.nasa.gov/spaceimages/images/largesize/PIA16837_hires.jpg'

### Mars Facts

In [4]:
#url for mars facts table data to scrape
table_url = "https://space-facts.com/mars/"

In [5]:
#open chromedriver browser
browser.visit(table_url)

In [8]:
#read_html for bringing in table with Mars Facts
mars_facts = pd.read_html(table_url)[0]
mars_facts

#change column names and set index
mars_facts.columns = ["Description", "Mars"]
mars_facts.set_index("Description", inplace=True)

mars_facts

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [9]:
#export DataFrame to html
html_mars_table = mars_facts.to_html()
html_mars_table
pprint(html_mars_table)

('<table border="1" class="dataframe">\n'
 '  <thead>\n'
 '    <tr style="text-align: right;">\n'
 '      <th></th>\n'
 '      <th>Mars</th>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>Description</th>\n'
 '      <th></th>\n'
 '    </tr>\n'
 '  </thead>\n'
 '  <tbody>\n'
 '    <tr>\n'
 '      <th>Equatorial Diameter:</th>\n'
 '      <td>6,792 km</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>Polar Diameter:</th>\n'
 '      <td>6,752 km</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>Mass:</th>\n'
 '      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>Moons:</th>\n'
 '      <td>2 (Phobos &amp; Deimos)</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>Orbit Distance:</th>\n'
 '      <td>227,943,824 km (1.38 AU)</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>Orbit Period:</th>\n'
 '      <td>687 days (1.9 years)</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>Surface Temperature:</th>\n'
 '      <td>-87 to -5 °C</td>\n'
 '    </tr>\n'
 '    <tr>\n'

### Mars Hemispheres

In [3]:
#url for hemisphere images to scrape
hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [56]:
#open chromedriver browser
browser.visit(hemi_url)

In [57]:
browser.find_by_tag("h3").click()

In [58]:
#create a Beautiful Soup object 
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

#scrape the title
title = soup.select_one("div.content h2").text
print(title)

#scrape the image url
image_url = soup.select_one("div.downloads a")["href"]
print(image_url)

Cerberus Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg


In [4]:
#return to base url
browser.visit(hemi_url)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

hemisphere_urls=[]

hemis = browser.find_by_tag("h3")


for items in range(len(hemis)):
    hemispheres = {}
    
    browser.find_by_css("h3")[items].click()
    
    hemispheres['title'] = browser.find_by_css("h2.title").text
    image = browser.links.find_by_text('Sample').first
    hemispheres['image_url'] = image["href"]
    
    hemisphere_urls.append(hemispheres)
    browser.back()           

In [60]:
print(hemisphere_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
