# Setup and Dependecies

In [1]:
# Import dependecies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import random
import re

In [2]:
# Configuring splinter browser to access HTML of the target pages
executable_path = {'executable_path': 'chromedriver.exe', 'headless': False }
browser = Browser('chrome', **executable_path)

# NASA Mars News

In [3]:
# Scraping NASA website for a title and contents of the latest news
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
soup = bs(browser.html, 'lxml')

In [4]:
news_title = None
news_text = None

article = soup.find('li', class_='slide')
if article:
    header = article.find('div', class_='content_title')
    if header:
        news_title = header.text.strip()
    body = article.find('div', class_='article_teaser_body')
    if body:
        news_text = body.text.strip()
print(f'The latest news title: {news_title}')
print(f'The latest news text: {news_text}')    

The latest news title: Third ASPIRE Test Confirms Mars 2020 Parachute a Go
The latest news text: The supersonic parachute that will handle the heaviest payload yet to the Red Planet – Mars 2020 rover – passes its final sounding rocket test with flying colors.


# JPL Mars Space Images - Featured Image

In [5]:
# Scraping Jet Propulsion Laboratory website for one high-res image of Mars
url_base = 'https://www.jpl.nasa.gov'
url = f'{url_base}/spaceimages/?search=&category=Mars'
browser.visit(url)
soup = bs(browser.html, 'lxml')

In [6]:
featured_img_url = None
featured_img_title = None

section = soup.find('section', class_='main_feature')
if section:
    article = section.find('article', class_='carousel_item')
    if article:
        match = re.search("url\('.+'\)", article['style'])
        featured_img_url = match[0][5:][:-2]
        featured_img_url = f'{url_base}{featured_img_url}'
        title = article.h1
        if title:
            featured_img_title = article.h1.text.strip()
print(f'High-res featured image "{featured_img_title}" is found at {featured_img_url}')    

High-res featured image "'John Klein' Site Selected for Curiosity's Drill Debut" is found at https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16567-1920x1200.jpg


# Mars Weather

In [7]:
# Scraping Twitter webpage for the latest tweet on Mars weather
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
soup = bs(browser.html, 'lxml')

In [8]:
mars_weather = None

tweet = soup.find('div', class_='tweet')
if tweet:
    tweet_text = tweet.find('p', class_='tweet-text')
    if tweet_text:
        mars_weather = tweet_text.text.strip()
print(f'Latest Mars weather report: {mars_weather}')

Latest Mars weather report: Sol 2209 (2018-10-23), high -18C/0F, low -73C/-99F, pressure at 8.79 hPa, daylight 06:09-18:26


# Mars Facts

In [9]:
# Scraping Space Facts webpage for the interesting info on Mars
url = 'http://space-facts.com/mars/'
browser.visit(url)
soup = bs(browser.html, 'lxml')

In [10]:
facts_df = pd.read_html(browser.html)[0]
facts_df.rename(columns={0:'Fact', 1:'Details'}, inplace=True)
facts = facts_df.to_dict(orient='list')
facts

{'Fact': ['Equatorial Diameter:',
  'Polar Diameter:',
  'Mass:',
  'Moons:',
  'Orbit Distance:',
  'Orbit Period:',
  'Surface Temperature:',
  'First Record:',
  'Recorded By:'],
 'Details': ['6,792 km',
  '6,752 km',
  '6.42 x 10^23 kg (10.7% Earth)',
  '2 (Phobos & Deimos)',
  '227,943,824 km (1.52 AU)',
  '687 days (1.9 years)',
  '-153 to 20 °C',
  '2nd millennium BC',
  'Egyptian astronomers']}

# Mars Hemispheres

In [7]:
# Scraping high-res images of Mars hemispheres
url_base = 'https://astrogeology.usgs.gov'
url = f'{url_base}/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
soup = bs(browser.html, 'lxml')

In [9]:
thumbs = soup.find_all('div', class_='item')
hemisphere_images = []
word_to_trim = ' Enhanced'
for thumb in thumbs:
    img_url = None
    img_title = None
    thumb_link = thumb.find('a', class_='itemLink')
    if thumb_link:    
        browser.visit(f'{url_base}{thumb_link["href"]}')
        soup = bs(browser.html, 'lxml')
        img = soup.find('img', class_='wide-image')
        if img:
            img_url = img['src']
            img_url = f'{url_base}{img_url}'
        title = soup.find('h2', class_='title')
        if title:
            img_title = title.text.strip()
            if img_title.endswith(word_to_trim):
                img_title = img_title[:-len(word_to_trim)]

        if img_url:
            hemisphere_images.append({
                'title': img_title,
                'url': img_url
            })
print('Discovered the following images of the Mars hemispheres:')
print(hemisphere_images)

Discovered the following images of the Mars hemispheres:
[{'title': 'Cerberus Hemisphere', 'url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]


In [16]:
!jupyter nbconvert --to script scraper.ipynb

[NbConvertApp] Converting notebook scraper.ipynb to script
[NbConvertApp] Writing 3698 bytes to scraper.py
