In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import os
import requests
import pandas as pd
import time

In [2]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

# Read HTML from website
url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"

browser.visit(url)

In [3]:
# Create a Beautiful Soup object
html = browser.html
soup = bs(html, 'html.parser')

In [4]:
# Examine the results, then determine element that contains title and paragraph
# print(soup.prettify())

## NASA Mars News

In [5]:
news_title = soup.find("li", class_="slide").find("div", class_="content_title").text
print(news_title)
news_p = soup.find("li", class_="slide").find("div", class_="article_teaser_body").text
print(news_p)

NASA's New Mars Rover Is Ready for Space Lasers
Perseverance is one of a few Mars spacecraft carrying laser retroreflectors. The devices could provide new science and safer Mars landings in the future.


## JPL Mars Space Images - Featured Image

In [6]:
# Visit/Set the url for JPL Featured Space Image
url="https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

In [7]:
featured_img_button = browser.find_by_id('full_image')[0]
featured_img_button.click()
browser.is_element_present_by_text('more info',wait_time =2)
more_info_button = browser.links.find_by_partial_text('more info')
more_info_button.click()

In [8]:
# Create a Beautiful Soup object
html = browser.html
soup = bs(html, 'html.parser')

In [9]:
# print(soup.prettify())

In [10]:
featured_img = soup.select_one("figure.lede a img").get("src")
# featured_img
featured_img = (f" https://www.jpl.nasa.gov{featured_img}")

## Mars Facts

In [11]:
# Visit/Set the url to scrape the table 
mars_facts_url = "https://space-facts.com/mars/"
#scrape pandas for table containing facts about the planet including Diameter, Mass, etc.
table = pd.read_html(mars_facts_url)
mars_df= table[0]

mars_df.columns = ["Facts", "Measure"]
mars_df

Unnamed: 0,Facts,Measure
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [25]:
#convert to html
mars_table = mars_df.to_html(classes='data table', index=False, header=False, border=0)

# print(mars_table)

# Mars Hemispheres

In [13]:
# Visit/Set the url
hemispheres_url = ('https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars')
browser.visit(hemispheres_url)
#scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")
# Create a hemispheres list
hemispheres = []
# Search for the names of all four hemispheres
results = soup.find_all('div',class_="collapsible results")
hemi_list = results[0].find_all('h3')

# Get text and store in list
for name in hemi_list:
    hemispheres.append(name.text)

hemispheres

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [14]:
# Search for thumbnail links
thumbnail_results = results[0].find_all('a')
thumbnails = []

for thumbnail in thumbnail_results:
    
    # If the thumbnail element has an image...
    if (thumbnail.img):
        
        # then grab the attached link
        thumbnail_url = 'https://astrogeology.usgs.gov/' + thumbnail['href']
        
        # Append list with links
        thumbnails.append(thumbnail_url)

thumbnails

['https://astrogeology.usgs.gov//search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/valles_marineris_enhanced']

In [15]:
full_imgs = []

for url in thumbnails:
    
    # Click through each thumbanil link
    browser.visit(url)
    
    html = browser.html
    soup = bs(html, 'html.parser')
    
    # Scrape each page for the relative image path
    results = soup.find_all('img', class_='wide-image')
    relative_img_path = results[0]['src']
    
    # Combine the reltaive image path to get the full url
    img_link = 'https://astrogeology.usgs.gov/' + relative_img_path
    
    # Add full image links to a list
    full_imgs.append(img_link)

full_imgs

['https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov//cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg']

In [16]:
print(f"Hemisphere Image URLs")
print()
print("-------------------------------------------------")
print()
print(f"Cerberus Hemisphere Enhanced:\n{full_imgs[0]}")
print()
print(f"Schiaparelli Hemisphere Enhanced:\n{full_imgs[1]}")
print()
print(f"Syrtis Major Hemisphere Enhanced:\n{full_imgs[2]}")
print()
print(f"Valles Marineris Hemisphere Enhanced:\n{full_imgs[3]}")

Hemisphere Image URLs

-------------------------------------------------

Cerberus Hemisphere Enhanced:
https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg

Schiaparelli Hemisphere Enhanced:
https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg

Syrtis Major Hemisphere Enhanced:
https://astrogeology.usgs.gov//cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg

Valles Marineris Hemisphere Enhanced:
https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg


In [17]:
# Zip together the list of hemisphere names and hemisphere image links
mars_hemi_zip = zip(hemispheres, full_imgs)

hemisphere_image_urls = []

# Iterate through the zipped object
for title, img in mars_hemi_zip:
    
    mars_hemi_dict = {}
    
    # Add hemisphere title to dictionary
    mars_hemi_dict['title'] = title
    
    # Add image url to dictionary
    mars_hemi_dict['img_url'] = img
    
    # Append the list with dictionaries
    hemisphere_image_urls.append(mars_hemi_dict)

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [18]:
browser.quit()