In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd

In [2]:
# Part I: Web Scraping

In [3]:
# Crawl news website
news_web_url = "https://mars.nasa.gov/news"

In [4]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [5]:
browser.visit(news_web_url)

In [6]:
news_title = browser.find_by_css('.content_title > a')[0].text
news_title

"Curiosity Tastes First Sample in 'Clay-Bearing Unit'"

In [7]:
news_p = browser.find_by_css('.article_teaser_body')[0].text
news_p

'This new region on Mars might reveal more about the role of water on Mount Sharp.'

In [8]:
# Crawl image website
images_web_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

In [9]:
browser.visit(images_web_url)

In [10]:
# Click on the first image
browser.find_by_css(".image_and_description_container")[0].click()

In [11]:
featured_image_url = browser.find_by_css(".fancybox-image")[0]["src"]
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23112_hires.jpg'

In [12]:
# Crawl Mars weather
weather_web_url = "https://twitter.com/marswxreport?lang=en"

In [13]:
browser.visit(weather_web_url)

In [14]:
sections = browser.find_by_css("p.TweetTextSize.TweetTextSize--normal.js-tweet-text.tweet-text")

In [15]:
for section in sections:
    if section.text.startswith("InSight"):
        text_to_use = section.text
        break

In [16]:
text_to_use

'InSight sol 137 (2019-04-16) low -97.0ºC (-142.7ºF) high -15.9ºC (3.4ºF)\nwinds from the SW at 4.3 m/s (9.7 mph) gusting to 12.4 m/s (27.7 mph)\npressure at 7.30 hPa'

In [17]:
# Remove \n
text_to_use = text_to_use.replace("\n", " ")
text_to_use

'InSight sol 137 (2019-04-16) low -97.0ºC (-142.7ºF) high -15.9ºC (3.4ºF) winds from the SW at 4.3 m/s (9.7 mph) gusting to 12.4 m/s (27.7 mph) pressure at 7.30 hPa'

In [18]:
text_to_use = text_to_use.replace("InSight ", "")
text_to_use = text_to_use.replace("sol", "Sol")
text_to_use = text_to_use.replace(")", "),")
text_to_use

'Sol 137 (2019-04-16), low -97.0ºC (-142.7ºF), high -15.9ºC (3.4ºF), winds from the SW at 4.3 m/s (9.7 mph), gusting to 12.4 m/s (27.7 mph), pressure at 7.30 hPa'

In [19]:
mars_weather = text_to_use

In [20]:
# Crawl Mars facts
facts_web_url = "https://space-facts.com/mars/"

In [21]:
tables = pd.read_html(facts_web_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [22]:
df = tables[0]
df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [23]:
df.columns = ["description", "value"]
df = df.set_index("description")
df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [24]:
# Save a html file
df.to_html('table.html')

In [25]:
# Crawl Mars hemispheres
hemispheres_web_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [26]:
browser.visit(hemispheres_web_url)

In [27]:
elements = browser.find_by_css("img.thumb")
elements

[<splinter.driver.webdriver.WebDriverElement at 0x11437ef28>,
 <splinter.driver.webdriver.WebDriverElement at 0x11437eef0>,
 <splinter.driver.webdriver.WebDriverElement at 0x114475208>,
 <splinter.driver.webdriver.WebDriverElement at 0x114475550>]

In [28]:
hemisphere_image_urls = []
for i in range(len(elements)):
    store_dict = dict()
    browser.find_by_css("img.thumb")[i].click()
    title = browser.find_by_css("div.content h2.title").text
    title = title.replace(" Enhanced", "")
    img_url = browser.find_by_css("div.downloads li a")[0]["href"]
    store_dict["title"] = title
    store_dict["img_url"] = img_url
    hemisphere_image_urls.append(store_dict)
    browser.back()

In [29]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [30]:
browser.quit()