In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup

In [2]:
# Path to chromedriver
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## Visit the NASA mars news site


In [7]:
# Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

# Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [8]:
# Convert the browser html to a soup object and then quit the browser
html = browser.html
news_soup = BeautifulSoup(html, 'html.parser')

slide_elem = news_soup.select_one('ul.item_list li.slide')

In [9]:
slide_elem.find("div", class_='content_title')

<div class="content_title"><a href="/news/8361/nasas-insight-passes-halfway-to-mars-instruments-check-in/" target="_self">NASA's InSight Passes Halfway to Mars, Instruments Check In</a></div>

In [10]:
# Use the parent element to find the first a tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title')
news_title.get_text()

"NASA's InSight Passes Halfway to Mars, Instruments Check In"

In [12]:
# Use the parent element to find the paragraph text
list_text = slide_elem.find("div", class_='list_text')
title = list_text.find("div", class_='content_title')
title.get_text()
teaser = list_text.find("div", class_='article_teaser_body')
news_data = teaser.get_text()


news_data

"NASA's InSight spacecraft, en route to a Nov. 26 landing on Mars, passed the halfway mark on Aug. 6. All of its instruments have been tested and are working well."

## JPL Space Images Featured Image

In [9]:
# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [10]:
# Find and click the full image button
browser.visit(url)
browser.find_by_id('full_image').click()
featured_image_url = browser.find_by_css('.fancybox-image').first['src']
print(featured_image_url)


https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA12831_ip.jpg


In [11]:
# Find the more info button and click that


In [12]:
# Parse the resulting html with soup
soup = BeautifulSoup(browser.html,'html.parser')
end = soup.find('img',class_='fancybox-image')['src']
JPL_image = "https://www.jpl.nasa.gov"+end


## Mars Weather


In [13]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

for text in browser.find_by_css('.tweet-text'):
    if text.text.partition(' ')[0] == 'Sol':
        mars_weather = text.text
        break
print(mars_weather)


Sol 2145 (2018-08-19), high -10C/14F, low -66C/-86F, pressure at 8.68 hPa, daylight 05:29-17:43


## Mars Facts

In [14]:
import pandas as pd

In [15]:
# url for Mars Facts

url_mars_facts= "https://space-facts.com/mars/"

In [16]:
# Read facts into pandas
tables = pd.read_html(url_mars_facts, attrs ={'id': 'tablepress-mars'})[0]
tables.columns =['Dimension ', 'Value']
tables

Unnamed: 0,Dimension,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [17]:
# Or print mars facts into html

df = pd.read_html(url_mars_facts, attrs = {'id': 'tablepress-mars'})[0]
df = df.set_index(0).rename(columns={1:"value"})
del df.index.name
mars_facts = df.to_html()
print(mars_facts)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


## Mars Hemisperes

In [18]:
hemis_url= "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [19]:
browser.visit(hemis_url)
soup = BeautifulSoup(browser.html,'html.parser')

In [20]:
headers=[]
titles = soup.find_all('h3')

In [21]:
for title in titles:
    headers.append(title.text)

In [22]:
images=[]
count=0
for thumb in headers:
    browser.find_by_css('img.thumb')[count].click()
    images.append(browser.find_by_text('Sample')['href'])
    browser.back()
    count=count+1

In [23]:
hemisphere_image_urls = []
counter = 0
for item in images:
    hemisphere_image_urls.append({"title":headers[counter],"img_url":images[counter]})
    counter = counter+1

In [151]:
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
