In [1]:
#import all important libraries
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
from selenium import webdriver

In [2]:
#Create a splinter of the webpage to start scraping
#executable_path = {"executable_path": "resources/chromedriver.exe"}
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser('chrome', **executable_path, headless=False)

# NASA Mars News

In [3]:
#Define the url of the webpage and create an independent instance of that page
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [4]:
#establish BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, "html.parser")
#print(soup.prettify())

In [5]:
#Scrape the article title and teaser from the webpage
news_title = soup.find("div", class_="content_title").get_text()
news_title

'MarCO Makes Space for Small Explorers'

In [6]:
#Scrape the teaser from the webpage
#news_title = soup.find("div", class_="content_title").get_text()
news_p = soup.find("div", class_="article_teaser_body").get_text()
news_p

'A pair of NASA CubeSats flying to Mars are opening a new frontier for small spacecraft.'

# JPL Mars Space Images - Featured Image

In [7]:
#visit the jpl homepage 
url_picture = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url_picture)

In [8]:
# establish Beautiful soul 
html_image = browser.html
soup2 = BeautifulSoup(html_image, "html.parser")
#print(soup2.prettify())

In [9]:
#find the image url for the featured image on the Mars page
img_url_end = soup2.article.find("a", {"class" : "button"})['data-fancybox-href']
featured_image_url = "https://jpl.nasa.gov" + img_url_end
featured_image_url

'https://jpl.nasa.gov/spaceimages/images/mediumsize/PIA17838_ip.jpg'

# Mars Weather

In [10]:
#visit the Mars Weather Twitter page
url_weather = "https://twitter.com/marswxreport?lang=en"
browser.visit(url_weather)

In [11]:
# Create BeautifulSoup object, parse with 'html.parser'
html_weather = browser.html
soup_weather = BeautifulSoup(html_weather, "html.parser")
#print(soup_weather.prettify())

In [12]:
#find the most recent tweet and scrape the text
mars_weather = soup_weather.find("p", class_="TweetTextSize").get_text()
mars_weather

'Sol 2169 (2018-09-12), high -10C/14F, low -70C/-93F, pressure at 8.82 hPa, daylight 05:41-17:58'

# Mars Facts

In [13]:
#visit the Mars facts page
url_facts = "http://space-facts.com/mars/"
browser.visit(url_facts)

In [14]:
# Create BeautifulSoup object, parse with 'hmm.parser'
#html_facts = browser.html
#soup_facts = BeautifulSoup(html_facts, "html.parser")
#print(soup_facts.prettify())

In [15]:
# read url with pandas
tables = pd.read_html(url_facts)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [16]:
#find the correct table, convert to html, then remove excess characters
df = tables[0]
df_html = df.to_html()
df_html = df_html.replace('\n','')
df_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>0</th>      <th>1</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    <

# Mars Hemispheres

In [17]:
# Visit USGS Astrogeology site
url_hemisphere_base = 'https://astrogeology.usgs.gov'
url_hemisphere = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url_hemisphere)

In [18]:
# Create BeautifulSoup object and parser to 'html.parser'
html_hemisphere = browser.html
soup_hemisphere = BeautifulSoup(html_hemisphere, "html.parser")
#print(soup_hemisphere.prettify())

In [19]:
#create a for loop to find all extensions to indivdual pages for full resolution images
hemisphere_results = soup_hemisphere.find_all("div", class_="item")
hemisphere_links = []

for item in hemisphere_results:
    try:
        link = item.find('a', class_="itemLink")['href']
        hemisphere_links.append(url_hemisphere_base+link)
    except AttributeError as e:
        print(e)

In [20]:
hemisphere_links

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [21]:
hemisphere_img_links = []
for link in hemisphere_links:
    url_link = link
    browser.visit(url_link)
    
    html_link = browser.html
    soup_link = BeautifulSoup(html_link, "html.parser")
    
    img_link = soup_link.find('img', {'class':'wide-image'})['src']
    img_title = soup_link.find('h2', {'class': 'title'}).get_text()
    #hemisphere_img_links.append(url_hemisphere_base + img_link)
    hemisphere_img_links.append({"title": img_title, 
                                "img_url": url_hemisphere_base + img_link})

In [22]:
hemisphere_img_links

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]