# Scraping Steps for Mars App

In [1]:
# Dependencies

from bs4 import BeautifulSoup as bs
import pandas as pd

from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Executable path

executable_path = {"executable_path": ChromeDriverManager().install()}

browser = Browser('chrome', **executable_path, headless=False)


[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389






[WDM] - Driver [/Users/seltsam/.wdm/drivers/chromedriver/mac64/89.0.4389.23/chromedriver] found in cache


## NASA website

In [3]:
# Url to be scraped (NASA Mars News Site)
url = "https://mars.nasa.gov/news/"

# Direct browser to nasa page
browser.visit(url)

# Save html from browser to variable
html = browser.html

# Create Beautiful soup object and parse
soup = bs(html, "html.parser")

# Retrieve results for most recent title (top most article)
results = soup.select_one("ul.item_list li.slide")

# Save first title as title variable
title = results.find("div", class_="content_title").text

# Save first paragraph of article as paragraph variable
paragraph = results.find("div", class_="article_teaser_body").text

# Print results to console for testing
print(title)
print()
print(paragraph)


NASA's Perseverance Mars Rover Mission Honors Navajo Language

Working with the Navajo Nation, the rover team has named features on Mars with words from the Navajo language.


## JPL website

In [4]:
# Url to be scraped for image (Jet Propulsion Laboratory, California Institute of Technology)
url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"

# Direct browser to JPL page
browser.visit(url)

# Save html from browser to variable
html = browser.html

# Create Beautiful soup object and parse
soup_image = bs(html, "html.parser")

# Click on "full image" to get larger pic
full_image = soup_image.find("a", class_="showimg fancybox-thumbs")

# Save relative url of image with base url into variable
featured = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/" + full_image["href"]

# Print to console for testing
print(featured)


https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars3.jpg


## Space Facts website

In [5]:
# Convert html with pandas
df = pd.read_html("https://space-facts.com/mars/")

# Select first dataframe in list
mars_df = df[0]

### Cleaning dataframe

# Name columns
mars_df.columns=["Description", "Mars"]

# Set first column as index
mars_df.set_index("Description", inplace=True)

# Display
mars_df


Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [6]:
# Convert dataframe to html table
mars_table = mars_df.to_html()

mars_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n

## USGS Astrogeology website

In [7]:
# Url to be scraped for images (USBS Astrogeology)

url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# Direct browser to USGS page
browser.visit(url)

# List of hemispheres
hemisphere_urls = browser.find_by_css("a.product-item h3")

# Empty list to save final pic urls
hemisphere_images = []

# Loop through list of hemispheres
for i in range(len(hemisphere_urls)):
    
    # Empty dictionary for images
    hemisphere = {}
    
    # Click to get link to larger image
    browser.find_by_css("a.product-item h3")[i].click()
    
    # Get image url and titles for images
    hemisphere_links = browser.links.find_by_text("Sample").first["href"]
    hemisphere_title = browser.find_by_css("h2.title").text
    
    # Save results in dictionary
    hemisphere["title"] = hemisphere_title
    hemisphere["link"] = hemisphere_links
    
    # Append dictionary values to list
    hemisphere_images.append(hemisphere)
    
    # Go back to prior page
    browser.back()

# Print results to console
print(hemisphere_images)
    


[{'title': 'Cerberus Hemisphere Enhanced', 'link': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'link': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'link': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'link': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


## Close browser

In [8]:
browser.quit()