In [1]:
# Import Dependencies
import pandas as pd
import requests as req
import time
from bs4 import BeautifulSoup as bs
from splinter import Browser
from selenium import webdriver

In [2]:
# Set up Chrome driver path
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
## Step 1 - Scraping

In [4]:
# NASA Mars News
# Define url
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [5]:
# Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) 
html = browser.html
soup = bs(html, 'html.parser')

#Scrape the section of html with the data needed
image_description = soup.find('div', class_='image_and_description_container')

#Further narrow down the data until there is only the title and paragraph
news_title = image_description.find('div', class_='content_title')
news_p = image_description.find('div', class_='article_teaser_body')

# Print the results
print(news_title.text)
print(news_p.text)


NASA InSight's 'Mole' Is Out of Sight
Now that the heat probe is just below the Martian surface, InSight's arm will scoop some additional soil on top to help it keep digging so it can take Mars' temperature.


In [6]:
## JPL Mars Space Images - Featured Image

In [7]:
# Define url
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [8]:
# Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars)
# Use splinter to navigate the site and find the image url for the current Featured Mars Image
# Assign the url string to a variable called `featured_image_url`
   
featured_image_url = 'https://www.jpl.nasa.gov//spaceimages/images/wallpaper/PIA00063-1920x1200.jpg'

In [9]:
## Mars Facts

In [10]:
# Visit the Mars Facts webpage [here](https://space-facts.com/mars/)
# Visit Mars Facts webpage for interesting facts about Mars
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)
html = browser.html

In [11]:
# Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
table = pd.read_html(facts_url)
mars_facts = table[1]

print(mars_facts)
mars_facts.index

  Mars - Earth Comparison             Mars            Earth
0               Diameter:         6,779 km        12,742 km
1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
2                  Moons:                2                1
3      Distance from Sun:   227,943,824 km   149,598,262 km
4         Length of Year:   687 Earth days      365.24 days
5            Temperature:     -87 to -5 °C      -88 to 58°C


RangeIndex(start=0, stop=6, step=1)

In [12]:
# Rename columns
mars_facts.columns = ['Description','Mars','Earth']
# Reset Index to be description
mars_facts.set_index('Description', inplace=True)
mars_facts

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [13]:
# Use Pandas to convert the data to a HTML table string
mars_facts.to_html('table.html')

In [14]:
## Mars Hemispheres

#```python
# Example:
#hemisphere_image_urls = [
    #{"title": "Valles Marineris Hemisphere", "img_url": "..."},
    #{"title": "Cerberus Hemisphere", "img_url": "..."},
    #{"title": "Schiaparelli Hemisphere", "img_url": "..."},
    #{"title": "Syrtis Major Hemisphere", "img_url": "..."},
##]

In [15]:
# Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars)
# Obtain high resolution images for each of Mar's hemispheresfor hemisphere images.
nextpage_urls = []
imgtitles = []
base_url = 'https://astrogeology.usgs.gov'
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)

In [16]:
# Click each of the links to the hemispheres in order to find the image url to the full resolution image
# Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. 

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")

In [17]:
# Create dictionary to for titles & links to the hemisphere images.
hemisphere_image_urls = []

In [18]:
# Retrieve all html elements that contain hemisphere images information
hemispheres_img = soup.find_all('div', class_='description')

In [19]:
# Iterate through each div to pull titles and make list of hrefs to iterate through
counter = 0
for div in hemispheres_img:
    # Use Beautiful Soup's find() method to navigate and retrieve attributes
    link = div.find('a')
    href=link['href']
    img_title = div.a.find('h3')
    img_title = img_title.text
    imgtitles.append(img_title)
    next_page = base_url + href
    nextpage_urls.append({"title":img_title, "img url":next_page})
    counter = counter+1
    if (counter == 4):
        break

nextpage_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'}]

In [20]:
# Append the dictionary with the image url string and the hemisphere title to a list.
# This list will contain one dictionary for each hemisphere.

In [21]:
browser.quit()