In [1]:
# Import Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd

In [2]:
# Set Executable Path
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}

# Set Browser
browser = Browser("chrome", **executable_path, headless=False)

In [3]:
# URL for Scraping
news_url = "https://mars.nasa.gov/news/"

# Visit URL
browser.visit(news_url)

In [4]:
# Ensure Page Loads
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [5]:
# Create Beautiful Soup Object
soup = bs(browser.html, "html.parser")

In [6]:
# Access the Story
story = soup.select_one("ul.item_list li.slide")

# Find News Title
news_title = story.find("div", class_="content_title").get_text()

# Print News Title
print(news_title)

# Find News Paragraph
news_p = story.find("div", class_="article_teaser_body").get_text()

# Print News Paragraph
print(news_p)

The Detective Aboard NASA's Perseverance Rover
An instrument called SHERLOC will, with the help of its partner WATSON, hunt for signs of ancient life by detecting organic molecules and minerals.


In [7]:
# URL for Scraping
featured_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Visit URL
browser.visit(featured_url)

In [8]:
# Create Beautiful Soup Object
soup = bs(browser.html, "html.parser")

In [9]:
# Access the Picture
picture = soup.select_one("ul.articles li.slide")

# Find Mars Picture Partial URL
mars_pic = picture.find("a", class_="fancybox").get("data-fancybox-href")

# Set Nasa URL
nasa_url = "https://www.jpl.nasa.gov"

# Complete Picture URL
featured_image_url = f"{nasa_url}{mars_pic}"

# Print Picture URL
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23932_hires.jpg


In [10]:
# URL for Scraping
fact_url = "https://space-facts.com/mars/"

# Get First Table
mars_table = pd.read_html(f"{fact_url}")[0]

# Set Column Names
mars_table.columns=["Description", "Value"]

# Set Description to Index
mars_table.set_index("Description", inplace=True)

# Display Table
mars_table

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [12]:
# URL for Scraping
hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# Visit URL
browser.visit(hemisphere_url)

In [13]:
# Create List to Hold URLs
hemisphere_image_urls = []

# Find Links
links = browser.find_by_css("a.product-item h3")

# Create For Loop to Get Image URLs
for link in range(len(links)):
    
    # Set Empty Dictionary
    hemisphere = {}
    
    # Find Element on Each Loop to Avoid a Stale Element Exception
    browser.find_by_css("a.product-item h3")[link].click()
    
    # Find Sample Image Anchor Tag
    img = browser.find_link_by_text("Sample").first
    
    # img_url href Key Value Pair
    hemisphere["img_url"] = img["href"]
    
    # Get Hemisphere Title
    hemisphere["title"] = browser.find_by_css("h2.title").text
    
    # Append Hemisphere Object to List
    hemisphere_image_urls.append(hemisphere)
    
    # Navigate Back
    browser.back()

# Print Dictionary
hemisphere_image_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [15]:
# Close Broswer
browser.quit()