In [217]:
# Our Notebook.

In [218]:
import os
from bs4 import BeautifulSoup as bs
import pandas as pd
from splinter import Browser

In [219]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [220]:
nasa_url = 'https://mars.nasa.gov/news/'
browser.visit(nasa_url)

In [221]:
html = browser.html
soupy = bs(html, 'html.parser')
story = soupy.find(class_='list_text')

In [222]:
story

<div class="list_text"><div class="list_date">August 24, 2020</div><div class="content_title"><a href="/news/8744/nasa-engineers-checking-insights-weather-sensors/" target="_self">NASA Engineers Checking InSight's Weather Sensors</a></div><div class="article_teaser_body">An electronics issue is suspected to be preventing the sensors from sharing their data about Mars weather with the spacecraft.</div></div>

In [223]:
content_title = story.find(class_='content_title')
link = content_title.find('a')
href = link['href']
href_text = content_title.find('a', href=href, target="_self").text
print(href)
print(href_text)

/news/8744/nasa-engineers-checking-insights-weather-sensors/
NASA Engineers Checking InSight's Weather Sensors


In [224]:
# To scrape the paragraph from the article itself, we have to visit
# the page. We can construct the appropriate url by adding the
# href link we got to the basic Mars website.
basic_url = 'https://mars.nasa.gov'
article_url = basic_url + href
print(article_url)

https://mars.nasa.gov/news/8744/nasa-engineers-checking-insights-weather-sensors/


In [52]:
# It's now time to visit the article page.
browser.visit(article_url)

In [53]:
html_another = browser.html
soupy_another = bs(html_another, 'html.parser')
story_another = soupy_another.find(class_='wysiwyg_content')

In [55]:
print(story_another)

<div class="wysiwyg_content">
<hr/>
<p><i>A crisply rendered web application can show you where the agency's Mars 2020 mission is right now as it makes its way to the Red Planet for a Feb. 18, 2021, landing.</i></p>
<hr/>
<p>The last time we saw NASA's Mars 2020 Perseverance rover mission was on July 30, 2020, as it disappeared into the black of deep space on a trajectory for Mars. But with NASA's <a href="https://eyes.nasa.gov/apps/orrery/#/sc_perseverance" target="_blank">Eyes on the Solar System</a>, you can follow in real time as humanity's most sophisticated rover – and the Ingenuity Mars Helicopter traveling with it – treks millions of miles over the next six months to <a href="/news/8549/nasas-mars-2020-will-hunt-for-microscopic-fossils/">Jezero Crater</a>.</p>
<p>"Eyes on the Solar System visualizes the same trajectory data that the navigation team uses to plot Perseverance's course to Mars," said Fernando Abilleira, the Mars 2020 mission design and navigation manager at NASA's

In [59]:
# Since the first paragraph of this article is technically just the
# italicized description, we will need to scrape all of the paragraphs
# and only focus on the "second" one.
paragraphs = story_another.find_all('p')
first_paragraph = paragraphs[1].text
print(first_paragraph)

The last time we saw NASA's Mars 2020 Perseverance rover mission was on July 30, 2020, as it disappeared into the black of deep space on a trajectory for Mars. But with NASA's Eyes on the Solar System, you can follow in real time as humanity's most sophisticated rover – and the Ingenuity Mars Helicopter traveling with it – treks millions of miles over the next six months to Jezero Crater.


In [60]:
# We will now set up the url components for hte JPL section.
jpl_base = 'https://www.jpl.nasa.gov'
mars_image_extension = '/spaceimages/?search=&category=Mars#submit'
mars_images = jpl_base + mars_image_extension
print(mars_images)

https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars#submit


In [144]:
browser.visit(mars_images)
html_jpl = browser.html
soupy_jpl = bs(html_jpl, 'html.parser')
bg_image = soupy_jpl.find('article', class_='carousel_item')

In [157]:
# For the first image in the grid list...
page_images = soupy_jpl.find_all('img')
# To actually get to the first image, we have to skip the stylistic images that
# come before the grid list.
first_image = page_images[5]['src']
print(first_image)

/spaceimages/images/wallpaper/PIA24072-640x350.jpg


In [156]:
# For the featured background image...
featured_image_url = bg_image['style'].split("('", 1)[1].split("')")[0]
print(featured_image_url)

/spaceimages/images/wallpaper/PIA17462-1920x1200.jpg


In [158]:
# Time to scrape Mars Facts with Pandas.
mars_facts_url = 'https://space-facts.com/mars/'
table_facts = pd.read_html(mars_facts_url)

In [161]:
table_facts

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [162]:
# We only want that first table, so we'll define our table as such.
facts_table = table_facts[0]
facts_table

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [165]:
# I don't care about the column names, so I'll make them blank.
# Additionally, I'll set the index to be the first column.
facts_table.columns = ['', 'answers']
facts_table.set_index('', inplace=True)
facts_table.columns = ['']
facts_table

In [210]:
# We now want the HTML string.
html_version = facts_table.to_html()
html_version = html_version.replace('\n', '')
html_version

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th></th>    </tr>    <tr>      <th></th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [178]:
# Now for the hemispheres site...
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)
html_hemispheres = browser.html
soupy_hemispheres = bs(html_hemispheres, 'html.parser')

# The four hemispheres are in the collapsible results class, so we'll go there.
results = soupy_hemispheres.find(class_='collapsible results')
hemispheres_list = results.find_all(class_='item')
# We should get a list of four items.
print(hemispheres_list[1])

<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><h3>Schiaparelli Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 35 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern…</p></div> <!-- end description --></div>


In [189]:
# We'll create an empty list to store the image urls.
image_links = []

# We need to get the image urls from each of these list items, so a for loop is in order.
for hemisphere in hemispheres_list:
    link_location = hemisphere.find('a')
    hemisphere_extension = link_location['href']
    hemisphere_page = 'https://astrogeology.usgs.gov' + hemisphere_extension
    # We will now have to visit the url we just spliced together.
    browser.visit(hemisphere_page)
    html_hemi = browser.html
    soupy_hemi = bs(html_hemi, 'html.parser')
    # On each hemisphere page, we need to first go to li.
    # Since it's the sample we want, we can just use the regular find function.
    list_item = soupy_hemi.find('li')
    destination = list_item.find('a')
    image_links.append(destination['href'])

In [192]:
image_links

['https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']

In [193]:
# Since we're going to ultimately store these links in a dictionary, we'll create
# a list includes the corresponding hemisphere names.
hemisphere_keys = ['Cerebus', 'Sciaparelli', 'Syrtis Major', 'Valles Marineris']

In [215]:
# We will not create the empty dictionary.
hemisphere_image_dictionary = {}
for x in range(0, len(image_links)):   
    hemisphere_image_dictionary.update( {hemisphere_keys[x]: image_links[x]})
hemisphere_image_dictionary

{'Cerebus': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'Sciaparelli': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'Syrtis Major': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'Valles Marineris': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}

In [None]:
# This quits the browser.
browser.quit()