In [1]:
#Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import time
import pandas as pd

In [2]:
# Function to instantiate a browser object using chromdriver
def init_browser():
    executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
    return Browser('chrome', **executable_path, headless=False)

In [3]:
# URL of NASA page
url = 'https://mars.nasa.gov/news/'

# Instantiate the browser, store in browser variable
browser = init_browser()
# Go to url using .visit
browser.visit(url)
# Force a pause to give the articles a chance to load
time.sleep(2)
# Scrape the html text
html = browser.html
# The browser is still open so quit browser
browser.quit()

In [4]:
# Beautifulsoup object
soup = bs(html, 'html.parser')

# Retrieve all the parent divs of news titles and paragraphs
result = soup.find('ul', class_='item_list')
# print(result)

In [5]:
# Dig through the results to get the first slide, title, and paragraph
slide = result.find('div', class_='list_text')
title = slide.find('div', class_='content_title').text
paragraph = slide.find('div', class_='article_teaser_body').text

print(title)
print(paragraph)

Beyond Mars, the Mini MarCO Spacecraft Fall Silent
The first CubeSat mission to deep space may have reached their limit, but they could inspire future NASA missions.


In [6]:
# Url for JPL Featured Space Image
mars_images_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
# Instantiate the browser, store in browser variable
browser = init_browser()
# Go to browser with .visit
browser.visit(mars_images_url)
# Force a pause to give page a chance to completely load
time.sleep(2)
# Scrape the images
images_html = browser.html
# The browser is still open so quit browser
browser.quit()

In [7]:
# Beautifulsoup object, 
image_soup = bs(images_html, 'html.parser')
# print(image_soup)

# Narrow down the image_soup and dig through to the feature image 
image_result = image_soup.find('div', class_='carousel_items').find('article')['style']
# print(image_result)
# split the returned string to get only the relevant part for the url
image_url = image_result.split("'")[1]

# Combine "https://www.jpl.nasa.gov" and the image_url to create the featured image url
featured_image_url = f"'https://www.jpl.nasa.gov{image_url}'"
print(featured_image_url)


'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16884-1920x1200.jpg'


In [8]:
# Url for Mars weather tweet
mars_tweet_url = 'https://twitter.com/marswxreport?lang=en'
# Instantiate the browser, store in browser variable
browser = init_browser()
# Go to browser with .visit
browser.visit(mars_tweet_url)
# Force a pause to give page a chance to completely load
time.sleep(2)
# Scrape the images
tweets_html = browser.html
# The browser is still open so quit browser
browser.quit()

In [9]:
tweet_soup = bs(tweets_html, 'html.parser')
# print(tweet_soup)

# Get the latest Mars weather tweet and get the weather text
weather_result = tweet_soup.find('div', class_='tweet')
# print(weather_result)
weather = weather_result.find('p', class_='tweet-text').text
print(weather)

Sol 2310 (2019-02-04), high -6C/21F, low -75C/-102F, pressure at 8.15 hPa, daylight 06:47-18:53pic.twitter.com/oRYhGR1P9C


In [10]:
# Url for Mars facts
mars_facts_url = 'https://space-facts.com/mars/'

# Use pandas to get the tables from the url
mars_facts = pd.read_html(mars_facts_url)
facts_df = mars_facts[0]

# Add column names
facts_df.columns = ['Fact', 'Value']
facts_df = facts_df.set_index("Fact")

# Use Pandas to convert the data to a HTML table string
facts_html_table = facts_df.to_html()
facts_html_table = facts_html_table.replace('\n', '')
facts_html_table

print(facts_html_table)

<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Fact</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>


In [11]:
# Url for hemispheres
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
# Instantiate the browser, store in browser variable
browser = init_browser()
# Go to browser with .visit
browser.visit(hemispheres_url)
# Scrape the page
hemispheres_html = browser.html

# Get the list of partial hrefs for each link
hemisphere_soup = bs(hemispheres_html, 'html.parser')

In [12]:
# Create empty dictionary to hold titles and urls
hemisphere_dicts = []

# Get the links to click
buttons = hemisphere_soup.find_all('h3')
# print(buttons)

# Loop through the buttons to get the titles and url links
for button in buttons:
    hemisphere_dict = {}
    
    t = button.get_text()
    title = t.strip(' Enhanced')
    hemisphere_dict['title'] = title
    browser.click_link_by_partial_text(t)
    time.sleep(2)
    url = browser.find_link_by_partial_href('download')['href']
    time.sleep(2)
    hemisphere_dict['img_url'] = url
    hemisphere_dicts.append(hemisphere_dict)
    browser.visit(hemispheres_url)
    
# The browser is still open so quit browser
browser.quit()
    
    
for dct in hemisphere_dicts:
    print(dct)

{'title': 'Cerberus Hemispher', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}
{'title': 'Schiaparelli Hemispher', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}
{'title': 'Syrtis Major Hemispher', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}
{'title': 'Valles Marineris Hemispher', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}
