In [1]:
# Import Splinter and BeautifulSoup\
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path)

## Visit the NASA mars news site

In [3]:
# !pip install Browser

In [4]:
# Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [5]:
# Convert the browser html to a soup object and then quit the browser
html = browser.html
news_soup = BeautifulSoup(html, 'html.parser')

articles = news_soup.find_all("div", class_='content_title')

# Check the length of the article titles scraped
print(len(articles))

# Print the article titles 
for a in articles:
    print(a.text.strip())


51
Mars Now
NASA's Curiosity Mars Rover Explores a Changing Landscape
NASA's Ingenuity Mars Helicopter Spots Perseverance From Above
Aviation Week Awards NASA's Ingenuity Mars Helicopter With Laureate
NASA's Perseverance Team Assessing First Mars Sampling Attempt
Building a Mars Treasure Chest, One Test Tube at a Time
My Favorite Martian Image: Helicopter Scouts Ridge Area for Perseverance
Clays, Not Water, Are Likely Source of Mars 'Lakes'
NASA's InSight Reveals the Deep Interior of Mars
NASA Perseverance Mars Rover to Acquire First Sample
Signs of Life on Mars? NASA's Perseverance Rover Begins the Hunt
Journey to the Center of Mars With the InSight Lander Team
NASA to Brief Early Science From Perseverance Mars Rover
NASA's Mars Helicopter Reveals Intriguing Terrain for Rover Team
NASA's Curiosity Rover Finds Patches of Rock Record Erased, Revealing Clues
Meet the Open-Source Software Powering NASA's Ingenuity Mars Helicopter
NASA's Self-Driving Perseverance Mars Rover 'Takes the Whee

In [6]:
slide_elem = news_soup.select_one('ul.item_list li.slide')

# Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [7]:
# Use the parent element to find the first a tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title').get_text()
news_title

"NASA's Curiosity Mars Rover Explores a Changing Landscape"

In [8]:
# Use the parent element to find the paragraph text
news_p = slide_elem.find('div', class_="article_teaser_body").get_text()
news_p

'A new video rings in the rover’s ninth year on Mars, letting viewers tour Curiosity’s location on a Martian mountain.'

## JPL Space Images Featured Image

In [9]:
# Visit URL
url="https://spaceimages-mars.com/"
browser.visit(url)
time.sleep(5)
soup = BeautifulSoup(browser.html, 'html.parser')
images = soup.find_all('img', class_="headerimage")
image = images[0]
featured_image_url= url + image["src"]
featured_image_url

'https://spaceimages-mars.com/image/featured/mars1.jpg'

In [10]:
# !pip install time

## Mars Hemispheres

In [11]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [12]:
hemisphere_image_urls = []

# First, get a list of all of the hemispheres
links = browser.find_by_css("a.product-item h3")

# Next, loop through those links, click the link, find the sample anchor, return the href
for i in range(len(links)):
    hemisphere = {}
    
    # We have to find the elements on each loop to avoid a stale element exception
    browser.find_by_css("a.product-item h3")[i].click()
    
    # Next, we find the Sample image anchor tag and extract the href
    sample_elem = browser.find_link_by_text('Sample').first
    hemisphere['img_url'] = sample_elem['href']
    
    # Get Hemisphere title
    hemisphere['title'] = browser.find_by_css("h2.title").text
    
    # Append hemisphere object to list
    hemisphere_image_urls.append(hemisphere)
    
    # Finally, we navigate backwards
    browser.back()
    



In [13]:
hemisphere_image_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

## Mars Facts

In [14]:
import pandas as pd
df = pd.read_html('https://space-facts.com/mars/')[0]
df.columns=['description', 'value']
df.set_index('description', inplace=True)
df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [15]:
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

In [16]:
browser.quit()