In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)

In [3]:
# Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
# Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [4]:
html = browser.html
news_soup = BeautifulSoup(html, 'html.parser')
slide_elem = news_soup.select_one('ul.item_list li.slide')

In [5]:
slide_elem.find("div", class_='content_title')

<div class="content_title"><a href="/news/8631/nasas-curiosity-mars-rover-takes-a-new-selfie-before-record-climb/" target="_self">NASA's Curiosity Mars Rover Takes a New Selfie Before Record Climb</a></div>

In [6]:
# Use the parent element to find the first `a` tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title').get_text()
news_title

"NASA's Curiosity Mars Rover Takes a New Selfie Before Record Climb"

In [7]:
# Use the parent element to find the paragraph text
news_p = slide_elem.find('div', class_="article_teaser_body").get_text()
news_p

'Along with capturing an image before its steepest ascent ever, the robotic explorer filmed its "selfie stick," or robotic arm, in action.'

### Featured Images

In [8]:
# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [9]:
# Find and click the full image button
full_image_elem = browser.find_by_id('full_image')
full_image_elem.click()

In [10]:
# Find the more info button and click that
browser.is_element_present_by_text('more info', wait_time=1)
more_info_elem = browser.find_link_by_partial_text('more info')
more_info_elem.click()



In [11]:
# Parse the resulting html with soup
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')

In [12]:
# Find the relative image url
img_url_rel = img_soup.select_one('figure.lede a img').get("src")
img_url_rel

'/spaceimages/images/largesize/PIA17924_hires.jpg'

In [13]:
# Use the base URL to create an absolute URL
img_url = f'https://www.jpl.nasa.gov{img_url_rel}'
img_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17924_hires.jpg'

In [14]:
#Read HTML tables into a ``list`` of ``DataFrame`` objects
'''
attrs : dict or None, optional
    This is a dictionary of attributes that you can pass to use to identify
    the table in the HTML. These are not checked for validity before being
    passed to lxml or Beautiful Soup. However, these attributes must be
    valid HTML table attributes to work correctly. For example, ::

        attrs = {'id': 'table'}

    is a valid attribute dictionary because the 'id' HTML tag attribute is
    a valid HTML attribute for *any* HTML tag as per `this document
    <http://www.w3.org/TR/html-markup/global-attributes.html>`__. ::

        attrs = {'asdf': 'table'}

    is *not* a valid attribute dictionary because 'asdf' is not a valid
    HTML attribute even if it is a valid XML attribute.  Valid HTML 4.01
    table attributes can be found `here
    <http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2>`__. A
    working draft of the HTML 5 spec can be found `here
    <http://www.w3.org/TR/html-markup/table.html>`__. It contains the
    latest information on table attributes for the modern web.
'''
df = pd.read_html('http://space-facts.com/mars/')[0]

df.columns=['description', 'value']
df.set_index('description', inplace=True)
df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [15]:
#Pandas also has a way to easily convert our DataFrame back into HTML-ready code
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

# Challenge Start Here

In [16]:
# Visit the Search Page for Mars Surfaces
DOMAIN_URL = 'https://astrogeology.usgs.gov'
url = f'{DOMAIN_URL}/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [17]:
#Find how many surfaces
html = browser.html
soup = BeautifulSoup(html,'html.parser')
items = soup.find_all('div',class_='item')
len(items)

4

In [18]:
# find title and follow hyper links
dicList = []

for item in items:
    dicItem  = {}
    #find title in H3 in item
    dicItem["title"] = item.find('h3').text
    #find archor in first A in item
    dicItem["href"] =  item.find('a')["href"]
    #add to the list
    dicList.append(dicItem)

dicList

[{'title': 'Cerberus Hemisphere Enhanced',
  'href': '/search/map/Mars/Viking/cerberus_enhanced'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'href': '/search/map/Mars/Viking/schiaparelli_enhanced'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'href': '/search/map/Mars/Viking/syrtis_major_enhanced'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'href': '/search/map/Mars/Viking/valles_marineris_enhanced'}]

In [19]:
# find href image hyper links

for dicItem in dicList:
    # visit each follow hyper link
    browser.visit(f'{DOMAIN_URL}{dicItem["href"]}')
    html = browser.html
    soup = BeautifulSoup(html,'html.parser')
    # get the high definition image link
    sample = soup.find('div',class_='downloads').find_all('a')[0]["href"]
    orginal = soup.find('div',class_='downloads').find_all('a')[1]["href"]
    # store
    dicItem['img_url_jpg'] = sample
    dicItem['img_url'] = orginal
    # remove follow link
    dicItem.pop('href')
    
dicList

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url_jpg': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url_jpg': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url_jpg': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url_jpg': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'img_url': 'http://astropedia.astrogeo

In [20]:
#turning off browser after this is done
browser.quit()