In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import requests
import time
import pandas as pd

In [2]:
executable_path = {'chromedriver.exe'}
browser = Browser('chrome', headless=False)

### NASA Mars News

In [3]:
# Start with the Nasa Mars News page.
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
time.sleep(1)

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")
news_title = soup.find_all(class_='content_title')[1].text
news_p = soup.find_all(class_='article_teaser_body')[0].text

In [4]:
# Test the results
print(news_title)
print(news_p)

NASA, ULA Launch Mars 2020 Perseverance Rover Mission to Red Planet
The agency's Mars 2020 mission is on its way. It will land at Jezero Crater in about seven months, on Feb. 18, 2021. 


### JPL Mars Space Images - Featured Image

In [6]:
# Now let's get some images from our friends at JPL.
# Visit the page and then drill down to the featured image detail page.
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
time.sleep(1)
browser.links.find_by_partial_text('FULL IMAGE').first.click()
time.sleep(1)
browser.links.find_by_partial_text('more info').first.click()
time.sleep(1)

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")

# Find the featured image
relative_image_url = soup.find_all(class_='main_image')[0].get('src')
featured_image_url = 'https://www.jpl.nasa.gov' + relative_image_url

In [7]:
# Test the results
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22892_hires.jpg


### Mars Weather

In [8]:
# and now for the weather
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
time.sleep(1)

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")

# Extract the weather info.
mars_weather = soup.find_all(class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0')[27].text

In [9]:
# Test the results
print(mars_weather)

InSight sol 595 (2020-07-29) low -91.9ºC (-133.5ºF) high -12.9ºC (8.7ºF)
winds from the WNW at 7.0 m/s (15.6 mph) gusting to 18.2 m/s (40.7 mph)
pressure at 7.90 hPa


### Mars Facts

In [3]:
url = 'https://space-facts.com/mars/'
mars_table = pd.read_html(url)[0]
mars_table.columns = ['description', 'value']
mars_table.set_index('description', inplace=True)
mars_table_html = mars_table.to_html()

In [4]:
mars_table

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [5]:
mars_table_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

### Mars Hemispheres

In [5]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
time.sleep(1)

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")
hemisphere_image_urls = []
for thing in soup.find_all(class_='item'):
    # make a little dictionary
    hemisphere_dict = {}
    # add the name of the hemisphere
    hemisphere_dict['title'] = thing.find('h3').string
    # get the url for the image detail page
    target_url = 'https://astrogeology.usgs.gov/' + thing.find('a')['href']
    # go to the image detail page
    browser.visit(target_url)
    time.sleep(1)
    # scrape it
    html = browser.html
    new_soup = bs(html, "html.parser")
    # extract the image url and add it to the dictionary
    # I went with the jpg rather than the tif here. If I wanted the tif, I'd use '5' instead of '4'.
    hemisphere_dict['url'] = new_soup.find_all('a')[4].get('href')
    # add the dictionary to the list
    hemisphere_image_urls.append(hemisphere_dict)

In [6]:
# Test the results
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [7]:
browser.quit()