## Import Dependencies and Splinter Setup

In [78]:
# 10.3.3  1. Import Splinter and Beautiful Soup
from splinter import Browser
from bs4 import BeautifulSoup as soup

import pandas as pd

In [79]:
# 2. Setup splinter -- preps automated browser.  
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## Visit the NASA Mars Site

In [80]:
# 3. Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
# a. Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)



# a. i.  searching for elements with a specific combination of tag 
# (ul and li) and attribute (item_list and slide, respectively). 
# For example, ul.item_list would be found in HTML as 
# <ul class=”item_list”>


# ii. tells browser to wait one second before searching for components.
# The optional delay is useful because sometimes dynamic pages take 
# a little while to load, especially if they are image-heavy.




True

In [81]:
# 4. Parse the HTML
# i.
html = browser.html   
# ii
news_soup = soup(html, 'html.parser')
# iii
slide_elem = news_soup.select_one('ul.item_list li.slide')

# Use the parent element to find the first `a` tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title').get_text()
news_title



# i. creates an HTML object assigned to the html variable
# ii. Use Beautiful Soup to parse the html object
# # iii. Notice how we've assigned slide_elem as the variable to look
# for the <ul /> tag and its descendent 
# (the other tags within the <ul /> element)

"5 Hidden Gems Are Riding Aboard NASA's Perseverance Rover"

In [82]:
# Use the parent element to find the paragraph text
news_p = slide_elem.find('div', class_="article_teaser_body").get_text()
news_p

"The symbols, mottos, and small objects added to the agency's newest Mars rover serve a variety of purposes, from functional to decorative."

## 10.3.4 Scrape Featured Image

In [83]:
# Splinter Finding Elements

    # browser.find_by_css('h1')
    # browser.find_by_xpath('//h1')
    # browser.find_by_tag('h1')
    # browser.find_by_name('name')
    # browser.find_by_text('Hello World!')
    # browser.find_by_id('firstheader')
    # browser.find_by_value('query')

In [84]:
# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [85]:
# Find and click the full image button
full_image_elem = browser.find_by_id('full_image')
full_image_elem.click()

In [86]:
# Find the more info button and click that
browser.is_element_present_by_text('more info', wait_time=1)
more_info_elem = browser.links.find_by_partial_text('more info')
more_info_elem.click()


# i. The is_element_present_by_text() method is used to search for 
# an element that has the provided text, in this case “more info.” 
# Once this line is executed, it will return a Boolean to let us know if the element is present (true) or not (false).

# ii. Creates a new variable, more_info_elem, where we employ the
# browser.links.find_by_partial_text() method. 
# This method will take our string ‘more info’ to find the 
# link associated with the "more info" text.

# iii. Splinter to click that link by chaining the 
# .click() function onto our more_info_elem variable




In [87]:
# Parse the resulting html with soup
html = browser.html
img_soup = soup(html, 'html.parser')

In [88]:
# Find the relative image url
img_url_rel = img_soup.select_one('figure.lede a img').get("src")
img_url_rel




# figure.lede references the <figure /> tag and its class, lede.
# a is the next tag nested inside the <figure /> tag.
# An img tag is also nested within this HTML, so we've included that as well.
# .get("src") pulls the link to the image.

# we're saying, "This is where the image we want lives—use the link that's inside these tags."

'/spaceimages/images/largesize/PIA16227_hires.jpg'

In [89]:
# Use the base URL to create an absolute URL (f-string)
img_url = f'https://www.jpl.nasa.gov{img_url_rel}'
img_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16227_hires.jpg'

## Scrape Mars Facts

In [90]:
# 10.3.5 Scrape Mars Data: Mars Facts (witinin a table)


df = pd.read_html('http://space-facts.com/mars/')[0]
df.columns=['description', 'value']
df.set_index('description', inplace=True)
df





# Instead of scraping each row, or the data in each <td />, 
# scrape the entire table with Pandas' .read_html() function.

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [91]:
# Want the extracted Mars' data placed to a web page
# convert dataframe back to html.  

df.to_html()


'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

## Mars Weather



In [92]:
# Visit the weather website
url = 'https://mars.nasa.gov/insight/weather/'
browser.visit(url)


In [93]:
# Parse the data
html = browser.html
weather_soup = soup(html, 'html.parser')

In [94]:
# Scrape the Daily Weather Report table
weather_table = weather_soup.find('table', class_='mb_table')
print(weather_table.prettify())

<table class="mb_table" id="weather_observation" style="width:100%;">
 <thead>
  <tr>
   <th colspan="2" scope="col">
    Time
   </th>
   <th colspan="3" id="temperature_lbl" scope="col">
    Air Temperature (
    <span class="lbl_fahrenheit">
     °F
    </span>
    <span class="slash">
     |
    </span>
    <span class="lbl_celsius fadeBlack">
     °C
    </span>
    )
   </th>
   <th colspan="4" id="windspeed_lbl" scope="col">
    Wind Speed (
    <span class="lbl_mph">
     mph
    </span>
    <span class="slash">
     |
    </span>
    <span class="lbl_mps fadeBlack">
     m/s
    </span>
    )
   </th>
   <th colspan="3" id="pressure_lbl" scope="col">
    Pressure (Pa)
   </th>
  </tr>
 </thead>
 <tbody>
  <tr id="weather_top">
   <th class="sol" scope="row">
    Date
   </th>
   <th class="sol" scope="row">
    Sol
   </th>
   <td class="temperature max">
    Max.
   </td>
   <td class="temperature avg">
    Avg.
   </td>
   <td class="temperature min">
    Min.
   </td>
   <t

## D1: Scrape High-Resolution Mars’ Hemisphere Images and Titles¶

### Hemispheres

In [95]:
# 1. Use browser to visit the URL 
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [96]:
# # 2. Create a list to hold the images and titles.
hemisphere_image_urls = []

# # 3. Write code to retrieve the image urls and titles for each hemisphere.

# Verify Tag and Class of Thumbnail
browser.is_element_present_by_css('a.product-item h3', wait_time=1) 

image_products = browser.find_by_css('a.product-item h3')
image_number=len(image_products)


  
for i in range(0, image_number): 
    # List to hold img_url and title
    hemisphere = {}
    # Go to second page - each of the hemispheres
    thumb_click = browser.find_by_css('a.product-item h3')[i].click()
    # Get title information
    browser.is_element_present_by_css('div.content h2.title', wait_time=1) 
    hemisphere['title'] = browser.find_by_css('div.content h2.title').text
    # Get link to full resolution image
    browser.is_element_present_by_css('div.downloads li a', wait_time=1) 
    sample_find = browser.find_by_css('div.downloads li a')
    hemisphere['img_url'] = sample_find['href']
    hemisphere_image_urls.append(hemisphere)
    
    browser.back()


In [97]:
# 4. Print the list that holds the dictionary of each image url and title.
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [98]:
# 5. Quit the browser
browser.quit()