# Import Libraries

In [1]:
# Import Dependencies

from bs4 import BeautifulSoup
import requests
from splinter import Browser
import time
import pandas as pd


executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Web Scraping :  NASA Mars News 

In [2]:
# URL of page to be scraped

url_to_scrape = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url_to_scrape)

# Retrieve page with the browser module, Create BeautifulSoup object; parse with 'html.parser'

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Find the latest News Title and Paragraph Text

news_title = soup.find('li',class_="slide").find('div',class_="content_title").text
news_para = soup.find('li',class_="slide").find('div',class_="article_teaser_body").text

print(f"News Title:\n{news_title}")
print('\n-----------------\n')
print(f"News Para:\n{news_para}")

News Title:
Meet the People Behind NASA's InSight Mars Lander

-----------------

News Para:
A series of NASA videos highlight scientists and engineers leading the next mission to Mars.


# Web Scraping : Mars Space Images - Featured Image

In [3]:
# URL of page to be scraped

image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url)

# Retrieve page with the browser module, Create BeautifulSoup object; parse with 'html.parser'

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Find the featured_image_url

image = soup.find(class_="button fancybox")["data-fancybox-href"]

featured_image_url = "https://www.jpl.nasa.gov/" + image

print('\n-----------------\n')

print(f"Featured Image URL:\n{featured_image_url}")


-----------------

Featured Image URL:
https://www.jpl.nasa.gov//spaceimages/images/mediumsize/PIA09113_ip.jpg


# Web Scraping : Mars Weather from Twitter 

In [4]:
# URL of page to be scraped

mars_twitter_url = 'https://twitter.com/MarsWxReport?lang=en'
browser.visit(mars_twitter_url)

# Retrieve page with the browser module, Create BeautifulSoup object; parse with 'html.parser'

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Find the featured_image_url

mars_weather  = soup.find('p',class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text

print('\n-----------------\n')

print(f"Latest Tweet:\n{mars_weather}")


-----------------

Latest Tweet:
A view of this morning’s launch of the Parker #SolarProbe launch from about 3 miles awayhttps://youtu.be/hDhIane1Rso 


# Web Scraping : Mars Facts 

In [5]:
# read_html function in Pandas to automatically scrape any tabular data from a page.

mars_facts_url = 'https://space-facts.com/mars/'
mars_facts_tables = pd.read_html(mars_facts_url)[0]
mars_facts_tables.columns = ['Attribute','Value']
mars_facts_tables

Unnamed: 0,Attribute,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [6]:
#DataFrames as HTML
#Save the table directly to a file

mars_facts_tables.to_html('mars_facts_table.html',index=False)

# Web Scraping : Mars Hemispheres

In [7]:
# URL of page to be scraped

mars_hem_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(mars_hem_url)

hemisphere_image_urls = []

# Retrieve page with the browser module, Create BeautifulSoup object; parse with 'html.parser'

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Examine the results, then determine element that contains sought info
# results are returned as an iterable list

results = soup.find('div', class_='result-list').find_all('div',class_='item')

In [8]:
# Image URL to be appended for each iteration

root_image_url = 'https://astrogeology.usgs.gov/'

for result in results:
    title = result.find('div',class_='description').h3.text
    
    # split the h3 text to remove "enhanced" from each hemisphere name e.g. Cerberus Hemisphere Enhanced
    title_split_join = ' '.join(title.split()[0:2]) 
    
    # create url for each hemisphere image
    image_url = root_image_url + result.find('a',class_='itemLink product-item')['href']
    browser.visit(image_url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    final_image_url = soup.find('div',class_='downloads').a['href']
    
    hemisphere_image = {
        "title" : title_split_join,
        "image_url" : final_image_url
    }
    hemisphere_image_urls.append(hemisphere_image)
    
    print('\n-----------------\n')
    print(hemisphere_image)



-----------------

{'title': 'Cerberus Hemisphere', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}

-----------------

{'title': 'Schiaparelli Hemisphere', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}

-----------------

{'title': 'Syrtis Major', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}

-----------------

{'title': 'Valles Marineris', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}
