In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
import pandas as pd
from splinter import Browser

In [2]:
executable_path = {'executable_path': 'resources/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News
Scrape the NASA Mars News Site and collect the latest news title and paragraph text.

In [3]:
url_news = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url_news)

In [4]:
soup_news = bs(browser.html, 'html.parser')

In [5]:
# Examine then determine element that contains sought info
#print(soup_news.prettify())

In [6]:
# Find latest news title
news_title = soup_news.find('div', class_='content_title').text
print(news_title)

NASA Social Media and Websites Win Webby Awards 


In [7]:
# Find latest news paragraph
news_paragraph = soup_news.find('div', class_='article_teaser_body').text
print(news_paragraph)

NASA's social media presence, the InSight mission social media accounts, NASA.gov and SolarSystem.NASA.gov will be honored at the 2019 Webby Awards - "the Oscars of the Internet."


### JPL Mars Space Images - Featured Image
Use splinter to navigate the site and find the image url for the current Featured Mars Image

In [8]:
url_image = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_image)

In [9]:
soup_image = bs(browser.html, 'html.parser')

In [10]:
#print(soup_image.prettify())

In [11]:
image = soup_image.find_all('a', class_ ="fancybox")[1]['data-fancybox-href']
print(image)

/spaceimages/images/largesize/PIA23177_hires.jpg


In [12]:
# Concatenate website url with scrapped route
featured_image_url = 'https://www.jpl.nasa.gov' + image

# Display full link to featured image
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23177_hires.jpg'

### Mars Weather from Twitter
Scrape the latest Mars weather tweet from Mars Weather twitter account.

In [13]:
url_twitter = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url_twitter)

In [14]:
soup_weather = bs(browser.html, 'html.parser')

In [15]:
#print(soup_weather.prettify())

In [16]:
# Display mars weather details
mars_weather = soup_weather.find_all('p', class_ = 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')[0].text

print(mars_weather)


InSight sol 148 (2019-04-27) low -99.1ºC (-146.4ºF) high -18.2ºC (-0.7ºF)
winds from the W at 4.2 m/s (9.3 mph) gusting to 14.3 m/s (31.9 mph)
pressure at 7.40 hPapic.twitter.com/v0syJv5akT


### I need to remove pic.twitter.com/v0syJv5akT to the above result

### Mars Facts from Space Facts
Visit the Space Facts webpage, mars facts page. 
Use Pandas to scrape the table containing facts about Mars including Diameter, Mass, etc.
Use Pandas to convert the data to a HTML table string

In [17]:
url_facts = 'https://space-facts.com/mars/'
browser.visit(url_facts)

In [18]:
# Use Panda's `read_html` to parse the url
facts_df = pd.read_html(url_facts)[0]
facts_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [19]:
# Rename columns
facts_df.columns = ['Description', 'Value']

# Set description column as index
facts_df.set_index('Description', inplace=True)

# Display Mars Facts
facts_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


### Mars Hemispheres
Visit the USGS Astrogeology site to obtain high resolution images for each of Mars' hemispheres.

In [20]:
url_hemispheres = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url_hemispheres)

In [21]:
soup_hemisphere = bs(browser.html, 'html.parser')

In [22]:
#print(soup_hemisphere.prettify())

In [23]:
results = soup_hemisphere.find_all('div', class_ = 'description')
hemisphere_image_urls = []

for result in results:
    
    # Get hemisphere name and save in variable called title
    title = result.find('h3').text
    
    # Get links to the hemispheres and save in variable called url
    partial_url = result.find('a', class_="itemLink product-item")['href']
    url = 'https://astrogeology.usgs.gov/' + partial_url
    
    # Click each of the url to find the full resolution hemisphere image. 
    # Save in variable called img_url).
    browser.visit(url)
    soup_imgs = bs(browser.html, 'html.parser')
    img_url = soup_imgs.find('div', class_='downloads').li.a['href']
    
    # Use a Python dictionary to store the data using the keys img_url and title. 
    # Append the dictionary with the hemisphere title and image url string to a list. 
    # This list will contain one dictionary for each hemisphere.
    hemisphere_image_urls.append({'title':title, 'img_url':img_url})

In [24]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]