In [1]:
# Import Dependencies 
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import requests

In [2]:
# Choose the executable path to driver 
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News

In [3]:
# Visit NASA news url through splinter
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [4]:
# Store HTML
html = browser.html

# Use Beautiful Soup to parse HTML
soup = bs(html, 'html.parser')

# Retrieve the latest element containing news title and news paragraph
news_title = soup.find('div', class_='content_title').find('a').text
news_par = soup.find('div', class_='article_teaser_body').text

# Display scrapped data
print(news_title)
print(news_par)

Mars InSight Lander Seen in First Images from Space 
Look closely, and you can make out the lander's solar panels.


## JPL Mars Space Images - Featured Image

In [5]:
# Visit Mars Space Images using splinter
image_url_feat = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url_feat)

In [6]:
# Store HTML
html_img = browser.html

# Use Beautiful Soup to parse HTML
soup = bs(html_img, 'html.parser')

# Retieve background image url
feat_img_url = soup.find('article')['style'].\
                replace('background-image: url(','').replace(');','')[1:-1]

# Website URL
main_url = 'https://www.jpl.nasa.gov'

# Combine website url with scrapped route
feat_img_url = main_url + feat_img_url

#Display full link to featured image
feat_img_url

"https://www.jpl.nasa.gov'/spaceimages/images/wallpaper/PIA16225-1920x1200.jpg'"

## Mars Weather

In [7]:
# Visit Mars Weather Twitter through splinter
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)

In [8]:
# Store HTML
html_weather = browser.html

# Use Beautiful Soup to parse HTML
soup = bs(html_weather, 'html.parser')

# Find all elements containing tweets
latest_tweets = soup.find_all('div', class_='js-tweet-text-container')

# Retrieve all elements containing news title in the specified range
# Look for entries that display weather related words to exclude unrelated tweets
for tweet in latest_tweets:
    weather_tweet = tweet.find('p').text
    if 'Sol' and 'pressure' in weather_tweet:
        print(weather_tweet)
        break
    else:
        pass

Sol 2258 (2018-12-13), high -6C/21F, low -70C/-93F, pressure at 8.41 hPa, daylight 06:37-18:51


## Mars Facts

In [9]:
# Visit Mars facts url
facts_url = 'https://space-facts.com/mars/'

# Usen Pandas to parse the url
mars_facts = pd.read_html(facts_url)

# Find the mars facts DataFrame in the list of DataFrames and assign it to mars_df
mars_df = mars_facts[0]

# Assign the columns: Description, Value
mars_df.columns = ['Description', 'Value']

# Set the index to 'Description' without row indexing
mars_df.set_index('Description', inplace=True)

# Save the HTML code
mars_df.to_html()

data = mars_df.to_dict(orient='records')

# Display mars_df
mars_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


## Mars Hemispheres

In [13]:
# Visit hemispheres website using splinter
hemis_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemis_url)

In [14]:
# Store HTML
html_hemis = browser.html

# Use Beautiful Soup to parse HTML
soup = bs(html_hemis, 'html.parser')

# Retreie all items containing mars hemisphere info
items = soup.find_all('div', class_='item')

# Create empty list for hemisphere urls
hemis_img_url = []

# Store the main url
hemis_main_url = 'https://astrogeology.usgs.gov'

# Loop through the items prevously stored
for i in items:
    # Store the title
    title = i.find('h3').text
    
    # Store link leading to full image website
    partial_img_url = i.find('a', class_='itemLink product-item')['href']
    
    # Visit the link for the full image site
    browser.visit(hemis_main_url + partial_img_url)
    
    # Store HTML for individual hemisphere info website
    partial_img_html = browser.html
    
    # Use Beautiful Soup to parse HTML for each individual hemisphere info site
    soup = bs(partial_img_html, 'html.parser')
    
    # Retrieve the full image source
    img_url = hemis_main_url + soup.find('img', class_='wide-image')['src']
    
    # Append the retrieved info to a list of dictionaries
    hemis_img_url.append({"title": title, "img_url": img_url})
       
# Display urls for hemisphere images
hemis_img_url

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]