## Mission to Mars: Web Scraping Challenge

In [17]:
#Import Dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pymongo
import pandas as pd
import time
import os

In [18]:
#Set Executable Path & Initialize Chrome Browser
executable_path = {"executable_path": (r"C:\Users\Mickey\anaconda3\Scripts\chromedriver.exe")}
browser = Browser("chrome", **executable_path)

In [19]:
#Browse URL
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [20]:
#Parse Results HTML with BeautifulSoup
html = browser.html
soup = bs(html, 'html.parser')

#### Mars News

In [22]:
#Scrape Title
title = soup.find_all("div", class_="content_title")

news_title = title[1].text.strip()
news_title

'Mars Is Getting a New Robotic Meteorologist'

In [23]:
#Scrape Paragraph
news_paragraph = soup.find("div", class_="article_teaser_body").text
news_paragraph

"Sensors on NASA's Perseverance will help prepare for future human exploration by taking weather measurements and studying dust particles."

#### Mars Images - Featured Image

In [24]:
#Browse URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [25]:
#Ask Splinter to Go to Site and Click Button with Class Name full_image
# <button class="full_image">Full Image</button>
full_image_button = browser.find_by_id("full_image")
full_image_button.click()

In [26]:
#Find "More Info" Button and Click It
browser.is_element_present_by_text("more info", wait_time=1)
more_info_element = browser.find_link_by_partial_text("more info")
more_info_element.click()



In [27]:
#Parse Results with BeautifulSoup 

html = browser.html
image_soup = bs(html, "html.parser")

img = image_soup.select_one("figure.lede a img")
img_url = img.get("src")

# combine with the base url
featured_img_url = f"https://www.jpl.nasa.gov{img_url}"

In [28]:
#Print Image
featured_img_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA14925_hires.jpg'

#### Mars Facts

In [29]:
#Browse URL
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [30]:
#Read Mars Facts using Pandas
facts_df = pd.read_html(url)[0]
facts_df.columns=["Description", "Value"]
facts_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [31]:
#Return Results
facts_df.to_html('mars_facts.html', index=False)

#### Mars Hemispheres

In [32]:
#Browse URL
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [33]:
#Create an Empty List to store Result
hemi_img_urls = []

#Get a List of Hemispheres (products)
products = browser.find_by_css("a.product-item h3")

#Begin For Loop 

for item in range(len(products)):
    hemisphere = {}

    browser.find_by_css("a.product-item h3")[item].click()
    
    time.sleep(1)
    
    #Find Sample Image
    sample_element = browser.links.find_by_text("Sample").first
    hemisphere["img_url"] = sample_element["href"]

    #Get Hemisphere Title
    hemisphere["title"] = browser.find_by_css("h2.title").text

    #Add to List
    hemi_img_urls.append(hemisphere)

    #Navigate to Previous Pagee
    browser.back()

#Return Results
hemi_img_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [34]:
#Close Browser
browser.quit()