### Dependencies

In [19]:
#Beautiful Soup
from bs4 import BeautifulSoup

#Pandas
import pandas as pd

#Splinter - Web Scrapping
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist

# data dependencies
from datetime import datetime

### To use splinter

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

To use ChromeDriver I had to perform two actions: 

1. Extract the chromdriver.exe and place it in the same folder as my project
2. Update the version of chrome I had to version of chromedriver.exe I downloaded 

Notes about selenium and splinter: 

Selenium allows you to loop through a website by element. 
It automates any manual input as well as coordinating through pages

### Start to Webscrape with Splinter

In [3]:
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [4]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

### NASA Mars News - Iterate Through Elements

In [5]:
# we are starting at the upper container of the articles to iterate through

# Retrive the parent divs for all articles
article_results = soup.find_all('li', class_="slide")


In [9]:
# Loop through results to retrieve article title, header, and timestamp of article
article_title = []
article_headline = []


## articles are posted in chronological order, with the latest articles at the top. 
## I want to stop the loop after 2018

for results in article_results:
    title = results.find(attrs={"class": "content_title"}).text
    headline = results.find(attrs={"class": "article_teaser_body"}).text
    date_of_article = results.find(attrs={"class": "list_date"}).text
    
    # to convert date of article to datetime object
    date_object = datetime.strptime(date_of_article, '%B %d, %Y')

        ## conditional loop to append articles
        
    if date_object.year == 2019:
        article_title.append(title)
        article_headline.append(headline)
#browser.click_link_by_partial_text('More')
        
    #elif date_object.year == 2017:
      #  print('Done')
      #  break
    
        
print('--------Titles-------')        
print(article_title)
print('--Article Teaser Body--')
print(article_headline)
    

--------Titles-------
['Small Satellite Mission of the Year', "NASA 'Optometrists' Verify Mars 2020 Rover's 20/20 Vision", 'New Finds for Mars Rover, Seven Years After Landing', 'MEDLI2 Installation on Mars 2020 Aeroshell Begins', "NASA's Mars 2020 Rover Does Biceps Curls ", "Fueling of NASA's Mars 2020 Rover Power System Begins", 'What Does a Marsquake Look Like?', 'Mars 2020 Rover: T-Minus One Year and Counting ', 'NASA Racks Up Two Emmy Nominations for Mission Coverage', 'Want to Colonize Mars? Aerogel Could Help', 'A Rover Pit Stop at JPL', 'Mars 2020 Rover Gets a Super Instrument', 'A Neil Armstrong for Mars: Landing the Mars 2020 Rover', "NASA's InSight Uncovers the 'Mole' ", "Mars 2020 Rover's 7-Foot-Long Robotic Arm Installed", "NASA Selects Partners for Mars 2020 'Name the Rover' Contest, Seeks Judges", "Curiosity's Mars Methane Mystery Continues", 'Mars 2020 Rover Gets Its Wheels', "The Mast Is Raised for NASA's Mars 2020 Rover", "NASA's Mars 2020 Will Blaze a Trail — for Hum

### JPL Mars Space Images - Parse CSS Style String

### Start to Webscrape with Splinter

In [62]:
## parse dependancies
import re

In [12]:
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)

In [13]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [63]:
container_results = soup.find('article')['style']
container_results

"background-image: url('/spaceimages/images/wallpaper/PIA19101-1920x1200.jpg');"

In [79]:
image_url = re.findall("url\('(.*?)'\)", container_results)
image_url[0]

'/spaceimages/images/wallpaper/PIA19101-1920x1200.jpg'

In [70]:
#source_Parse CSS Style: https://stackoverflow.com/questions/9271365/how-to-pull-out-css-attributes-from-inline-styles-with-beautifulsoup

In [81]:
base_url = 'https://www.jpl.nasa.gov'
featured_image_url = base_url + image_url[0]
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA19101-1920x1200.jpg'

### Mars Weather

In [82]:
mars_twitter_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_twitter_url)

In [83]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [86]:
mars_weather = soup.find('p', {"class": "tweet-text"}).text
mars_weather

'InSight sol 250 (2019-08-10) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.1ºF)\nwinds from the SSE at 4.4 m/s (9.8 mph) gusting to 16.2 m/s (36.2 mph)\npressure at 7.60 hPapic.twitter.com/9sZRRUi3dm'

### Mars Facts - Convert HTML Table To Pandas

In [113]:
mars_facts_url = 'https://space-facts.com/mars/'
browser.visit(mars_facts_url)

In [114]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [115]:
# display html table as a pandas html string table
from IPython.display import display_html
import json

In [118]:
mars_facts = soup.find('table', {"id": "tablepress-comp-mars"})
mars_facts_info = str(mars_facts)
mars_facts_info

'<table class="tablepress tablepress-id-comp-mars blue-table" id="tablepress-comp-mars"><thead><tr class="row-1"><th class="column-1"><strong>Mars - Earth Comparison</strong></th><th class="column-2"><span class="mars">Mars</span></th><th class="column-3"><span class="earth">Earth</span></th></tr></thead><tbody><tr class="row-2"><td class="column-1">Diameter:</td><td class="column-2"><span class="mars-s">6,779 km</span></td><td class="column-3"><span class="earth-s">12,742 km</span></td></tr><tr class="row-3"><td class="column-1">Mass:</td><td class="column-2"><span class="mars-s">6.39 × 10^23 kg</span></td><td class="column-3"><span class="earth-s">5.97 × 10^24 kg</span></td></tr><tr class="row-4"><td class="column-1">Moons:</td><td class="column-2"><span class="mars-s">2</span></td><td class="column-3"><span class="earth-s">1</span></td></tr><tr class="row-5"><td class="column-1">Distance from Sun:</td><td class="column-2"><span class="mars-s">227,943,824 km</span></td><td class="col

In [120]:
# Convert html to HTML table string
mars_facts_table = display_html(mars_facts_info, raw=True)
mars_facts_table

Mars - Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-153 to 20 °C,-88 to 58°C


In [None]:
#source_HTML Table to Pandas: https://beenje.github.io/blog/posts/parsing-html-tables-in-python-with-pandas/

### Mars Hempispheres  - Extract Children of an HTML Parent Element

In [122]:
mars_hempispheres_url = 'https://www.usgs.gov/media/images/valles-marineris-mars'
browser.visit(mars_hempispheres_url)

In [123]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [129]:
mars_hempisphere = soup.find('div', {"class":'field-name-scald-thumbnail'})
mars_hempisphere

<div class="field field-name-scald-thumbnail field-type-image field-label-hidden"><div class="field-items"><div class="field-item even"><img alt="Valles Marineris on Mars" class="img-responsive" height="361" src="https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/styles/full_width/public/thumbnails/image/valles_marineris_enhanced.jpg" width="1140"/></div></div></div>

In [132]:
## I could get both contents from the img element, but since I think the objective 

for results in mars_hempisphere: 
    img_url = mars_hempisphere.find('img')['src']
    title = mars_hempisphere.find('img')['alt']

print('---results----')
print(img_url)
print(title)

---results----
https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/styles/full_width/public/thumbnails/image/valles_marineris_enhanced.jpg
Valles Marineris on Mars


In [133]:
# append results into a dict
hemisphere_image_urls = [ {"Title": title, "img_url": img_url} ] 
hemisphere_image_urls

[{'Title': 'Valles Marineris on Mars',
  'img_url': 'https://prd-wret.s3-us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/styles/full_width/public/thumbnails/image/valles_marineris_enhanced.jpg'}]