# Scraping and analysis notebook


## NASA Mars News

In [1]:
#  Import dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd
import time

In [2]:
# Set the path for the chromedriver.exe file and select the browser used
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# URL of page to be scraped (NASA Mars News Site)
url = 'https://mars.nasa.gov/news'
browser.visit(url)

In [4]:
# Extract the text of the  latest news title and clean it up
news_title = browser.find_by_css(".grid_gallery.list_view .content_title a").text
news_title

"NASA's New Mars Rover Is Ready for Space Lasers"

In [5]:
# Extract the text of the latest news paragraph and clean it up
news_p = browser.find_by_css('div[class="article_teaser_body"]').text
news_p

'Perseverance is one of a few Mars spacecraft carrying laser retroreflectors. The devices could provide new science and safer Mars landings in the future.'

In [6]:
# Extract the text of the paragraph and clean it up OPTION 2
news_p2 = browser.find_by_xpath("/html/body/div[3]/div/div[3]/div[3]/div/article/div/section/div/ul/li[1]/div/div/div[3]").text
news_p2

'Perseverance is one of a few Mars spacecraft carrying laser retroreflectors. The devices could provide new science and safer Mars landings in the future.'

## JPL Mars Space Images - Featured Image

In [5]:
# launch the browser and opens the url
browser = Browser('chrome', **executable_path, headless=False)
url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url2)

In [6]:
# navigates the site and finds the image url for the current Featured Mars Image
browser.click_link_by_partial_text('FULL IMAGE')

In [7]:
browser.click_link_by_partial_text("more info")

In [8]:
# Extract and assign the url string to a variable
featured_image_url = browser.find_by_css('figure[class="lede"] img')["src"]
featured_image_url


'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16842_hires.jpg'

## Mars Facts

In [9]:
# URL of page to be scraped
url = "https://space-facts.com/mars/"

In [10]:
# Automatically scrape any tabular data from a page
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [11]:
# Select the first Df from a list od Dfs and rename the column names
df = tables[0]
df.rename(columns = {0:"Description", 1:"Value"}, inplace = True)
df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [12]:
# Generating a HTML tables from DataFrame and saving it directly to a file
html_table = df.to_html()
# html_table_cleaned = html_table.replace('\n', '')
df.to_html('table.html')
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium 

## Mars Hemispheres

In [15]:
# launch the browser and opens the url
browser = Browser('chrome', **executable_path, headless=False)
url3 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url3)

In [16]:
#  delay to get the web page open
time.sleep(3)

# Locating the titles, and storing them into list, as a strings
titles_object = browser.find_by_tag('h3')
titles = []
for x in titles_object:
    titles.append(x.text)
    

# Creating empty list that will store the image urls
hemisphere_image_urls = []

# iterating through the titles list, clicking the title link, and opening the coresponding page and retrieving the link
for title in titles:
    
    my_dict = {}
    my_dict["title"] = title
    browser.click_link_by_partial_text(title)
    my_dict["img_url"] = browser.find_by_text("Original")['href']
    
    hemisphere_image_urls.append(my_dict)
    browser.back()
    
    
print (hemisphere_image_urls)

browser.quit()    
        

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]
