### ---------------------------------------------
## Web Scraping - Mission to Mars
### ---------------------------------------------

In [166]:
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [167]:
# URL of pages to be scraped
mars_news_url = 'https://redplanetscience.com'
space_img_url = "https://spaceimages-mars.com"
mars_facts_url = "https://galaxyfacts-mars.com"
hemisph_url = "https://marshemispheres.com"

In [None]:
# Setup Splinter Browser
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [169]:
# Go to the news url
browser.visit(mars_news_url)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [170]:
# Find the news section
latest_news = soup.find('div', id='news')

# Get the latest news title
news_title = latest_news.find('div', class_='content_title').text
news_title

"NASA's Mars Reconnaissance Orbiter Undergoes Memory Update"

In [171]:
# Get the latest news paragraph
news_p = latest_news.find('div', class_='article_teaser_body').text
news_p

'Other orbiters will continue relaying data from Mars surface missions for a two-week period.'

### JPL Mars Space Images - Featured Image

In [172]:
# Visit the url for the Featured Space Image site 
browser.visit(space_img_url)
html = browser.html
soup = BeautifulSoup(html_img, 'html.parser')

In [173]:
# Find the image complete url for the current Featured Mars Image full size
local_image_url = soup.find('img', class_="headerimage fade-in")['src']
featured_image_url = space_image+'/'+local_image_url
featured_image_url

'https://spaceimages-mars.com/image/featured/mars1.jpg'

### Mars Facts

In [174]:
# Via Pandas scrape the table containing facts about the planet
tables = pd.read_html(mars_facts_url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [175]:
# Save data frame for needed table
df = tables[0]
df

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [176]:
# Rename columns, drop first row and set index
df.rename(columns = {0:'Facts', 1:'Mars', 2:'Earth'}, inplace=True)
df.drop(0, inplace=True)
df.set_index('Facts', inplace=True)
df

Unnamed: 0_level_0,Mars,Earth
Facts,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [177]:
# Convert the data to a HTML table string
html_table = df.to_html()
html_table.replace('\n', '')
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Facts</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

### Mars Hemispheres

In [178]:
browser.visit(hemisph_url)

In [179]:
# Find link texts to go to each hemisphere url  
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
 
links_text = []
items = soup.find_all('div', class_='item')
for it in items:
    descr = it.find('div', class_='description')
    links_text.append(descr.find('h3').text)
links_text

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [180]:
# Find links to full size images of mars hemispheres

# Define list to add title and img url for each hemisphere
hem_img_ttl_urls = []

# Loop through the hemisphere links to click on them and get title and url values
for txt in links_text:
    
    # Save title of the hemisphere  
    title = txt.replace(" Enhanced","")
    
    # Click hemisphere to access link to the original image
    browser.links.find_by_partial_text(txt).click()
    
    # Use Beautiful Soup to find the link
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    img_url = hemisph+'/'+soup.find('a', text = 'Original')['href']
    
    # Add to the list of dictionaries
    hem_img_ttl_urls.append({"title": title, "img_url": img_url})
    
    # Return to the main page
    browser.links.find_by_partial_text("Back").click()

In [181]:
hem_img_ttl_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://marshemispheres.com/images/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced.tif'}]

In [182]:
browser.quit()