# Mission to mars

* This web application scrapes data from various websites to get information about the planet Mars and complies it into a single HTML page.


In [9]:
# Dependencies
import pandas as pd
import requests as req
from bs4 import BeautifulSoup as bs
from splinter import Browser
from selenium import webdriver
import time

In [10]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [11]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## Step 1 Web Scraping


In [12]:
# Latest News URL for NASA Mars News
news_url = "https://mars.nasa.gov/news/"
browser.visit(news_url)
html = browser.html

# Parse HTML with Beautiful Soup
soup = bs(html, "html.parser")

# Pull title and paragraph text
article = soup.find("div", class_='list_text')
news_title = article.find("div", class_="content_title").text
news_p = article.find("div", class_ ="article_teaser_body").text
print(news_title)
print(news_p)

AttributeError: 'NoneType' object has no attribute 'find'

In [None]:
# URL for JPL Featured Space Image
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)

# Get the 'FULL IMAGE'
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(5)

# Go to 'more info'
browser.click_link_by_partial_text('more info')

# Parse HTML with Beautiful Soup
html = browser.html
image_soup = bs(html, 'html.parser')

# Scrape the URL
feat_img_url = image_soup.find('figure', class_='lede').a['href']
featured_image_url = f'https://www.jpl.nasa.gov{feat_img_url}'
print(featured_image_url)


# Mars Weather
* Visit the Mars Weather twitter account 
* Scrape the latest Mars weather tweet
* Save the tweet text for the weather report as a variable

In [13]:
# Twitter url for latest Mars Weather
tweet_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(tweet_url)
html = browser.html

# Use Beautiful Soup to parse HTML
soup = bs(html, 'html.parser')

# Extract latest tweet
tweet_container = soup.find_all('div', class_="js-tweet-text-container")

# Loop through latest tweets and find the tweet that has weather information
for tweet in tweet_container: 
    mars_weather = tweet.find('p').text
    if 'sol' and 'pressure' in mars_weather:
        print(mars_weather)
        break
    else: 
        pass

InSight sol 348 (2019-11-19) low -102.5ºC (-152.5ºF) high -23.2ºC (-9.8ºF)
winds from the SSE at 5.1 m/s (11.5 mph) gusting to 18.9 m/s (42.3 mph)
pressure at 6.80 hPapic.twitter.com/iZwojPj9au


# Facts about Mars
* Use pandas to scrape the table that contains data about Mars
* Convert to HTML table string

In [14]:
# Webpage for facts about Mars
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)
html = browser.html

In [15]:
# Use Pandas to scrape the table containing facts about Mars
table = pd.read_html(facts_url)
mars_facts = table[1]

# Rename columns
mars_facts.columns = ['Description','Value']

# Reset Index to be description
mars_facts.set_index('Description', inplace=True)
mars_facts

ValueError: Length mismatch: Expected axis has 3 elements, new values have 2 elements

# Mars Hemispheres
* Pull high resolution photos from the USGS Astrogeology for each of Mars' hemispheres 
* Save image url string for the full resolution hemisphere image and the Hemisphere title containing the hemisphere name.
* Use the keys img_url and title to store the data in a Python dictionary
* Append the dictionary with the image url string and the hemisphere title to a list

In [16]:
# Visit USGS webpage for Mars hemispehere images
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)
html = browser.html

# Parse HTML with Beautiful Soup
soup = bs(html, "html.parser")

# Create dictionary to store titles & links to images
hemisphere_image_urls = []

# Retrieve all elements that contain image information
results = soup.find("div", class_ = "result-list" )
hemispheres = results.find_all("div", class_="item")

# Iterate through each image
for hemisphere in hemispheres:
    title = hemisphere.find("h3").text
    title = title.replace("Enhanced", "")
    end_link = hemisphere.find("a")["href"]
    image_link = "https://astrogeology.usgs.gov/" + end_link    
    browser.visit(image_link)
    html = browser.html
    soup = bs(html, "html.parser")
    downloads = soup.find("div", class_="downloads")
    image_url = downloads.find("a")["href"]
    hemisphere_image_urls.append({"title": title, "img_url": image_url})

# Print image title and url
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
