## Mission to Mars

In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time

In [2]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News

In [3]:
# URL for most recent news article and paragraph text to scrape
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)

In [4]:
time.sleep(3)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [5]:
# Collecting the latest article and paragraph text
results = soup.find("div", class_="list_text")
news_title = results.find("div", class_="content_title").text
news_p = results.find("div", class_="article_teaser_body").text

print(news_title)
print(news_p)

NASA's MRO Completes 60,000 Trips Around Mars
The orbiting spacecraft is also about to set a record for data relayed from the Martian surface.


## JPL Mars Space Images - Featured Image

In [6]:
# URL for featured image to scrape
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)

In [7]:
time.sleep(3)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [8]:
# Scraping for the image and spliting the string to get desired URL portion
image_results = soup.find("article", class_="carousel_item")['style']
s = image_results.split("'")[1]
print(s)

/spaceimages/images/wallpaper/PIA19673-1920x1200.jpg


In [9]:
# Adding previous desired URL portion to create a full URL
featured_image_url = f'https://www.jpl.nasa.gov{s}'
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA19673-1920x1200.jpg


## Mars Weather

In [10]:
# URL for Mars Weather Tweet
weather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(weather_url)

In [11]:
time.sleep(3)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [12]:
# Scraping for latest weather tweet and selecting desired text only
mars_weather = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text[8:159]
print(mars_weather)

what a sunset on Earth looks like from above. / So sieht ein irdischer Sonnenuntergang aus, von oben gesehen. #Horizons #Archivepic.twitter.com/sgijQq8


## Mars Facts

In [13]:
# URL for Mars Facts
facts_url = "https://space-facts.com/mars/"

In [14]:
# Using read_html to scrape any tabular data from the page
tables = pd.read_html(facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [15]:
# Transforming scraped data into desired form
df = tables[0]
df.columns = ['Description','Value']
df = df.set_index('Description')
df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [16]:
# Generating table into a html table
marsfacts_html_table = df.to_html()
marsfacts_html_table = marsfacts_html_table.replace('\n', '')
marsfacts_html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

## Mars Hemispheres

In [17]:
# URL for images of Mars Hemispheres
mh_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(mh_url)

In [18]:
time.sleep(3)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [19]:
# Getting the div element that holds the images and titles 
images = soup.find('div', class_='collapsible results')

# Creating a list to hold dictionary of the images and titles to be scraped
hemisphere_image_urls = []

# Looping through tags and load the data to the dictionary
for x in range(len(images.find_all("div", class_="item"))):
        # Looking for h3 tag within the div class item
        img = browser.find_by_tag('h3')
        # Clicking for each 'x'/image of h3 tag to find larger image
        img[x].click()
        # HTML object
        html = browser.html
        # Parse HTML with Beautiful Soup
        soup = BeautifulSoup(html, 'html.parser')
        # Retrieve all elements that contain h2 fot title of the image
        title = soup.find("h2", class_="title").text
        # Retrieve all elements that contain an image on the web page 
        div = soup.find("div", class_="downloads")
        # Loop through the list tag to find URL link to full size version
        for li in div:
            link = div.find('a')
            url = link.attrs['href']
            hemisphere_dict = {
                'title' : title,
                'img_url' : url
                }
        # Append dictionary to the hemisphere list
        hemisphere_image_urls.append(hemisphere_dict)
        # Go back to the previous page to loop through next image
        browser.back()

print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
