# Misson to Mars

## <font color= blue> 1. Initial scraping using BeautifulSoup, Pandas, and Requests/Splinter.

In [35]:
#import libraries
from splinter import Browser
from bs4 import BeautifulSoup
import requests

In [36]:
!which chromedriver

/usr/local/bin/chromedriver


### 1.1 Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text

In [58]:
# establish connection with the browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(news_url)

news_title = None
news_p = None

# NASA site works with error (it is a beta-version as it is mentioned in the head of site). 
# It returns an error sometimes because of partial loading. This cycle makes three attempts to reload browser.
# browser.status_code.is_success() # True <<Not working for this Splinter version
# browser.status_code == 200 # True <<Not working for this Splinter version
# Thus error handling was made for 'List Index out of range'

retry = 3
while retry != 0:
    retry -= 1
    try:
        html = browser.html
        soup0 = BeautifulSoup(html, 'html.parser')

        # Choose the latest news 
        news_title = soup0.find_all('div', class_="content_title")[0].text
        news_p = soup0.find_all('div', class_="article_teaser_body")[0].text
        break
    except:
        print("request failed")
        browser.reload()    

browser.quit()

print(news_title)
print(news_p)

MarCO Makes Space for Small Explorers
A pair of NASA CubeSats flying to Mars are opening a new frontier for small spacecraft.


### 1.2 Scrape images

In [4]:
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
response1 = requests.get(img_url)
soup1 = BeautifulSoup(response1.text, 'html.parser')

# Using bs4 we extracted relative links to compressed images
images = soup1.find_all('img', class_="thumb")

# With split method we reconstructed the url to a full sized images using the extracted number of picture
img_list = []
for image in images[:30]:
    img_name = str(image['src']).split('/')[4].split('-')[0]
    img_url = 'https://www.jpl.nasa.gov/spaceimages/images/largesize/' + img_name + '_hires.jpg'
    img_list.append(img_url)

# Let random choose picture
import random
featured_image_url = random.choice(img_list)
print(featured_image_url)  


https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22669_hires.jpg


### 1.3 Scrape the latest Mars weather from the Mars Weather twitter account

In [5]:
twitter_url = 'https://twitter.com/marswxreport?lang=en'
response2 = requests.get(twitter_url)
soup2 = BeautifulSoup(response2.text, 'html.parser')

tweets  = soup2.find_all('p', class_="js-tweet-text")

# Choose tweet with weather
for tweet in tweets:
    if tweet.text.startswith('Sol '):
        mars_weather = tweet.text
        print(mars_weather)
        break

Sol 2169 (2018-09-12), high -10C/14F, low -70C/-93F, pressure at 8.82 hPa, daylight 05:41-17:58


### 1.4 Mars facts

In [6]:
# Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
import pandas as pd

facts_url = 'https://space-facts.com/mars/'
df = pd.read_html(facts_url)[0]

df.columns = ['Description', 'Value']
df.set_index("Description", inplace=True)
df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [7]:
# Use Pandas to convert the data to a HTML table string.
html_table = df.to_html()
html_table
df.to_html('table.html')

### 1.5 Mars Hemispheres

#### Note: I used approach where browser uses initial URL and than with nested for-loop navigates to a page with each hemisphere to withdraw image URL and after that goes back to switch to another hemisphere.

In [8]:
# Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.
# Click each of the links to the hemispheres in order to find the image url to the full resolution image.
# Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing 
# the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.
# Append the dictionary with the image url string and the hemisphere title to a list. 
# This list will contain one dictionary for each hemisphere.

# establish connection with the browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)


url_hemisph = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url_hemisph)
  
hemisphere_image_urls = []
# Create bs4 object of parced out html
soup = BeautifulSoup(browser.html, 'html.parser')

# Scrape names of hemispheres
h_names = soup.find_all('h3')
for h_name in h_names:

    browser.click_link_by_partial_text(h_name.text) # Follow link to page with a particular Hemisphere
    h_name = h_name.text.replace(' Enhanced','')
    
    soup = BeautifulSoup(browser.html, 'html.parser') # Create new bs4 object of a current page

    find_a = soup.find_all('a') # Find all <a> - tags containing "Sample" text - this is a link to an image
    
    for img_url in find_a:

        if (img_url.text == "Sample"):
            x = img_url['href'] # Withdraw img source
            a = {"title": h_name, "img_url": x} # Store values in dictionary
          
    browser.back()
    hemisphere_image_urls.append(a) # Append dictionary to a list
browser.quit()      
print(hemisphere_image_urls)        

[{'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [26]:
# create one python dictionary containing all data
facts = df.to_dict()

dictionary = {'news_title': news_title, 'news_paragraph': news_p, 'featured_image_url': 
              featured_image_url, 'mars_weather': mars_weather, 'facts': df, 'hemispheres': hemisphere_image_urls,
              'facts': facts['Value']
             }

import pprint
pprint.pprint(dictionary)

{'facts': {'Equatorial Diameter:': '6,792 km',
           'First Record:': '2nd millennium BC',
           'Mass:': '6.42 x 10^23 kg (10.7% Earth)',
           'Moons:': '2 (Phobos & Deimos)',
           'Orbit Distance:': '227,943,824 km (1.52 AU)',
           'Orbit Period:': '687 days (1.9 years)',
           'Polar Diameter:': '6,752 km',
           'Recorded By:': 'Egyptian astronomers',
           'Surface Temperature:': '-153 to 20 °C'},
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22669_hires.jpg',
 'hemispheres': [{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
                  'title': 'Cerberus Hemisphere'},
                 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
                  'title': 'Schiaparelli Hemisphere'},
                 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syr