In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import time

In [2]:
# find chromedriver location 
!which chromedriver

/c/Users/jmleg/Anaconda3/envs/PythonData/chromedriver


In [3]:
# establish path to chromedriver 
executable_path = {'executable_path': '/Users/jmleg/Anaconda3/envs/PythonData/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
#establish url variable for nasa.gov Mars page and go to page
url = "https://mars.nasa.gov/news/"
browser.visit(url)

html = browser.html
soup = bs(html,"html.parser")

In [5]:
#create variables and extract title and paragraph for most recent article
mars_title = soup.find("div",class_="content_title").text
mars_paragraph = soup.find("div", class_="article_teaser_body").text
print(mars_title)
print(mars_paragraph)

NASA to Host Media Call on Next Mars Landing Site
NASA will host a media teleconference at 9 a.m. PST (noon EST) Monday, Nov. 19, to provide details about the Mars 2020 rover’s landing site on the Red Planet.


In [6]:
# Create variable for space images page 
featured_mars_url = ("https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars")

In [7]:
# go to url, click full image, click more info to get to the large image 
browser.visit(featured_mars_url)
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(2)
browser.click_link_by_partial_text("more info")

In [8]:
# create variable for more info page 
more_info = browser.html

lg_img_Soup = bs(more_info,'html.parser')

# extract img src tag 
lg_img_src = lg_img_Soup.find('figure', class_='lede').find('img')['src']

# assemble URL for large image 
featured_image_url = 'https://www.jpl.nasa.gov' + lg_img_src 

featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16884_hires.jpg'

In [9]:
#connect to mars twitter account
mars_twitter_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_twitter_url)
mars_twitter_html = browser.html
mars_twitter_soup = bs(mars_twitter_html, 'lxml').body

In [10]:
#read first p tweet 
mars_weather = mars_twitter_soup.find('p', class_="tweet-text").text.strip()
mars_weather

'Sol 2230 (2018-11-14), high -5C/23F, low -72C/-97F, pressure at 8.59 hPa, daylight 06:22-18:39'

In [11]:
#variable for mars facts web page 
mars_facts_url = 'https://space-facts.com/mars/'

In [12]:
#Use Pandas to read the table on mars facts page 
tables = pd.read_html(mars_facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [13]:
#Turn Table in to dataframe
df = tables[0]
df.columns = ['', 'Values']
df.set_index('', inplace=True)
df

Unnamed: 0,Values
,
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [14]:
#Turn dataframe in to html code
html_table = df.to_html(table_id=None, buf=None, bold_rows=True)
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Values</th>\n    </tr>\n    <tr>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbod

In [15]:
#Create variable for Mars Hemisphere url, open site and use bs to parse
hem_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hem_url)
hem_html = browser.html
hem_soup = bs(hem_html, 'html.parser')


In [16]:
#create list to store dictionaries
hemisphere_data = []

In [17]:
#loop through each hemisphere link to retrieve the full image and title. Add to dictionary and append to list 
for x in range (4):
    title = browser.find_by_tag('h3')
    title[x].click()
    
    html = browser.html
    soup = bs(html, 'html.parser')
    
    wide_image = soup.find("img", class_="wide-image")["src"]
    image_title = soup.find("h2",class_="title").text
    image_url = 'https://astrogeology.usgs.gov'+ wide_image
    
    dictionary = {"title":image_title,"img_url":image_url}
    hemisphere_data.append(dictionary)
    
    browser.back()
    time.sleep(5)

In [18]:
hemisphere_data

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]