# Web Scraping - Mission to Mars  👩🏻‍🚀

In [36]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd

In [37]:
# Import Splinter and set the chromedriver path
from webdriver_manager.chrome import ChromeDriverManager
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Driver [C:\Users\Sharon\.wdm\drivers\chromedriver\win32\93.0.4577.15\chromedriver.exe] found in cache


## 🚀 NASA Mars News 🚀

Scrape the Mars News Site and collect the latest News Title and Paragraph Text. 

In [38]:
# Save NASA MARS URL and visit the page
url = 'https://redplanetscience.com/'
browser.visit(url)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = bs(html, "html.parser")

# Retrieve the latest news title and article
news_title = soup.find('div', class_='content_title').text
news_p = soup.find('div', class_='article_teaser_body').text

# Print title and paragraph
print(news_title)
print('------------------------------------------------------')
print(news_p)

NASA's Perseverance Mars Rover Gets Its Wheels and Air Brakes
------------------------------------------------------
After the rover was shipped from JPL to Kennedy Space Center, the team is getting closer to finalizing the spacecraft for launch later this summer.


## JPL Mars Space Images - Featured Image 🌑

Visit the url for the Featured Space Image site and retrieve the full sive image URL. 

In [39]:
# Save FEATURED SPACE IMAGE URL and visit the page
url = 'https://spaceimages-mars.com/'  
browser.visit(url)

# Design an XPATH selector to grab the featured image
xpath = '/html/body/div[1]/img'

# Use splinter to Click the featured image 
# to bring up the full resolution image
results = browser.find_by_xpath(xpath)
img = results[0]
img.click()

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = bs(html, "html.parser")
image_url = soup.find("img", class_="headerimage fade-in")["src"]
image_url

# Concatenate URL with image_url
featured_image_url = url + image_url
featured_image_url

'https://spaceimages-mars.com/image/featured/mars1.jpg'

### Bonus: Because I like pictures.  💥

In [40]:
# Use the requests library to download and save the image from the `featured_image_url` above
import requests
import shutil
response = requests.get(featured_image_url, stream=True)
with open('img.png', 'wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)
    
# Display the image with IPython.display
from IPython.display import Image
Image(url='img.png')

## Mars Facts 🛰

Visit the Mars Facts webpage, use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc, and convert it back to an HTML format.

In [46]:
# Save MARS FACTS URL and visit the page
url = 'https://galaxyfacts-mars.com/'

#Extract the Facts Table from the URL using pandas
tables=pd.read_html(url)
df=tables[1]
df.columns = ['Description', 'Mars']
df.set_index('Description', inplace=True)
df

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 ( Phobos & Deimos )
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [47]:
# Convert Data back to HTML
html_table = df.to_html(classes = 'table table-striped th-align-left table-bordered')
# print(html_table)

## Mars Hemispheres 👽

* Visit MARS HEMISPHERE URL to obtain high resolution images for each of Mar's hemispheres with their names.

In [None]:
# Visit USGS webpage for Mars hemispehere images
url = "https://marshemispheres.com/"
browser.visit(url)
html = browser.html

# Parse HTML with Beautiful Soup
soup = bs(html, "html.parser")

# Create dictionary to store titles & links to images
image_urls = []

# Retrieve all elements that contain image information
results = soup.find("div", class_ = "collapsible results" )
pics = results.find_all("div", class_="item")

# Iterate through each image
for pic in pics:
    
    # Scrape the titles
    title = pic.find("h3").text
    
    # Clean title so I only have the name of the hemishpere
    title = title.replace("Enhanced", "")
    
    # Go the pic links
    link = pic.find("a")["href"]
    pic_link = url + link    
    browser.visit(pic_link)
    
    # Parse link HTMLs with Beautiful Soup
    html = browser.html
    soup = bs(html, "html.parser")
    
    # Scrape the full size images
    downloads = soup.find("div", class_="downloads")
    pic_url = downloads.find("a")["href"]
    
    # Add URLs and Titles for the full size images to image_urls
    image_urls.append({"title": title, "image_url": url + pic_url})

# Print image title and url
print(image_urls)

[{'title': 'Cerberus Hemisphere ', 'image_url': 'https://marshemispheres.com/images/full.jpg'}, {'title': 'Schiaparelli Hemisphere ', 'image_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'}, {'title': 'Syrtis Major Hemisphere ', 'image_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'}, {'title': 'Valles Marineris Hemisphere ', 'image_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]
