In [8]:
# Import dependencies
import numpy as np  
import pandas as pd
from splinter import Browser  
from selenium import webdriver  
from bs4 import BeautifulSoup as bs  
import requests as req

************************************************************************************
Scrape the NASA Mars News Site and collect the latest News Title and Paragragh Text. Assign the text to variables that you can reference later.
************************************************************************************

In [9]:
# Initialize empty dictionary & empty list
news_data = {}
paragraph_text = []

In [10]:
# Define variables for base URL for finding paragraph text, URL for initial scrape, 
# acquiring first response from URL & sending the response to Beautiful Soup
base_url = "https://mars.nasa.gov/"
nasa_url = "https://mars.nasa.gov/news/"
first_response = req.get(nasa_url)
nasa_soup = bs(first_response.text, 'html.parser')

In [11]:
# Find class, all anchors & extract/clean the title
soup_div = nasa_soup.find(class_="slide")
soup_news = soup_div.find_all('a')
news_title = soup_news[1].get_text().strip()

In [12]:
# Find paragraphs, obtain paragraphs URL, concatenate URL, acquire 2nd response and send response to Beautiful Soup 
soup_p = soup_div.find_all('a', href=True)
soup_p_url = soup_p[0]['href']
paragraph_url = base_url + soup_p_url
second_response = req.get(paragraph_url)
para_soup = bs(second_response.text, "html.parser")

In [13]:
# Find class and paragraphs
ww_paragraphs = para_soup.find(class_='wysiwyg_content')
paragraphs = ww_paragraphs.find_all('p')

In [14]:
# Iterate through paragraphs & extract/clean, then append to list
for paragraph in paragraphs:
    clean_paragraph = paragraph.get_text().strip()
    paragraph_text.append(clean_paragraph)

In [15]:
# Add title, paragraph summary and detail to dicitonary
news_data["news_title"] = news_title 
news_data["paragraph_text_1"] = paragraph_text[0]  
news_data["paragraph_text_2"] = paragraph_text[1]

In [16]:
# Display dictionary
news_data

{'news_title': 'Opportunity Hunkers Down During Dust Storm',
 'paragraph_text_1': 'NASA Mars Exploration Rover Status Report',
 'paragraph_text_2': 'Updated at 1:20 p.m. PDT on June 20, 2018'}

************************************************************************************
Visit the url for JPL's Featured Space Image. Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
************************************************************************************

In [20]:
# Define splinter browser, base URL for images & search URL
browser = Browser('chrome', headless=False)
jpl_fullsize_url = 'https://photojournal.jpl.nasa.gov/jpeg/'
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

In [21]:
# Visit the search URL, acquire response from URL & send response to Beautiful Soup
browser.visit(jpl_url)
jpl_html = browser.html
jpl_soup = bs(jpl_html, 'html.parser')

In [22]:
# Initialize empty list, extract all images & append URL to list
featured_image_list = []
for image in jpl_soup.find_all('div',class_="img"):
    featured_image_list.append(image.find('img').get('src'))

In [23]:
# Extracts first image found
feature_image = featured_image_list[0]

# Split on '-' (removes size limiters)
temp_list_1 = feature_image.split('-')

# Split on '/' (parses out base filename)
temp_list_2 = temp_list_1[0].split('/')

# Concatenate fullsize image URL
featured_image_url = jpl_fullsize_url + temp_list_2[-1] + '.jpg'

In [24]:
# Display URL
featured_image_url

'https://photojournal.jpl.nasa.gov/jpeg/PIA22607.jpg'

In [25]:
# Close automated browser
browser.quit()

************************************************************************************
Visit the Mars Weather twitter account and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather
************************************************************************************

In [26]:
# Define browser, search URL and visit the search URL
browser = Browser('chrome', headless=False)
tweet_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(tweet_url)

In [27]:
# Acquires response from URL and send it to Beautiful Soup
tweet_html = browser.html
tweet_soup = bs(tweet_html, 'html.parser')

In [28]:
# Initialize empty list
weather_info_list = []

# Extract all tweets and append cleaned tweet to list
for weather_info in tweet_soup.find_all('p',class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"):
    weather_info_list.append(weather_info.text.strip())

In [29]:
# Loop through list backwards, isolate the weather tweet and assign it to mars_weather
for value in reversed(weather_info_list):
    if value[:3]=='Sol':
        mars_weather = value

In [30]:
# Display the tweet
mars_weather

'Sol 2108 (2018-07-12), Sunny, high -24C/-11F, low -65C/-84F, pressure at 8.06 hPa, daylight 05:19-17:27'

In [31]:
# Exit the automated browser
browser.quit()

************************************************************************************
Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
************************************************************************************

In [32]:
# Define the search URL, extract data using pandas & convert to dataframe
facts_url = 'https://space-facts.com/mars/'   
fact_list = pd.read_html(facts_url) 
facts_df = fact_list[0] 

# Convert dataframe to html table & display
facts_table = facts_df.to_html(header=False, index=False)
print(facts_table)

<table border="1" class="dataframe">
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


************************************************************************************
Visit the USGS Astrogeology site to obtain high resolution images for each of Mars' hemispheres.
************************************************************************************

In [33]:
# Define the browser and search URL, then visit search URL
browser = Browser('chrome', headless=False)
usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(usgs_url)

In [34]:
# Acquire response from URL & send it to Beautiful Soup
usgs_html = browser.html
usgs_soup = bs(usgs_html, 'html.parser')

In [35]:
# Create empty list & find products and hemispheres
hemisphere_image_urls = []
products = usgs_soup.find('div', class_='result-list')
hemispheres = products.find_all('div', class_='item')

# Iterate through hemispheres, extract cleaned title, acquire response from URL and send it to Beautiful Soup
for hemisphere in hemispheres:                                               
    title = hemisphere.find('div', class_='description')
    title_text = title.a.text
    title_text = title_text.replace(' Enhanced', '')
    browser.click_link_by_partial_text(title_text)
    usgs_html = browser.html
    usgs_soup = bs(usgs_html, 'html.parser')
    
    # Extract image url & add the dictionary to a list
    image = usgs_soup.find('div', class_='downloads').find('ul').find('li')  
    img_url = image.a['href']
    
    hemisphere_image_urls.append({'title': title_text, 'img_url': img_url})
    
    browser.click_link_by_partial_text('Back')

In [36]:
# Display the list of dictionaries
hemisphere_image_urls   

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere'}]

In [37]:
# Exit the automated browser
browser.quit()