In [159]:
# Import Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import pandas as pd


### NASA Mars News

- Scrape the __[NASA Mars News Site](https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest)__ 
and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [161]:
# pass the chrome executable path  and create an instance of the browser
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [162]:
# url for the NASA Mars News Site
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)

In [163]:
#dictionary  to load the news content 
news = {}

# parse html and navigate 
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

results = soup.find_all('li', class_="slide")
nl = '\n'

# Loop through returned results
for result in results:
# Error handling
    try:
    # Identify and return news title
        news_title = result.find("div", class_='content_title').text
        
        # Identify and return paragraph text
        news_p = result.div.find("div", class_="article_teaser_body").text
        
        # Print results only if title and paragraph text are available
        if (news_title and news_p):
            
            news = {'News Title' :news_title, 'Paragraph Text' :news_p }
            

    except ElementDoesNotExist:
        print("Error!")

In [165]:
# check if that worked

print(f"News Title : {news['News Title']}.{nl}{nl}Paragraph Text : {news['Paragraph Text']} {nl}")

News Title : Global Storms on Mars Launch Dust Towers Into the Sky.

Paragraph Text : A Mars Dust Tower Stands Out Dust storms are common on Mars. But every decade or so, something unpredictable happens: a series of runaway storms break out, covering the entire planet in a dusty haze. 



In [166]:
browser.quit()

### JPL Mars Space Images - Featured Image


- Visit the url for __[JPL Featured Space Image.](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars)__ </a> 

- Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
- Make sure to find the image url to the full size .jpg image.
- Make sure to save a complete url string for this image









In [98]:
# pass the chrome executable path  and create an instance of the browser
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [99]:
# url for the JPL Mars Space Images
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

In [100]:
# Since, we are using only Splinter for this task 
# we are going to interact with few buttons and href clicks accordingly to finally fetch the URL for the full-size image url
""" You click in buttons. Splinter follows any redirects, and submits forms associated with buttons."""

browser.find_link_by_partial_text('FULL IMAGE').first.click()
browser.find_link_by_partial_text('more info').first.click()

In [157]:
# parse html and navigate 
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# image title if needed for later use
featured_image_title = soup.h1.text.strip()


In [None]:
# navigate to the side column to fetch the largest sized image 
browser.click_link_by_partial_text('1920 x 1200')

# store it in the variable
featured_image_url = browser.windows[1].url 
 
# designate current Image window as a window & close
window = browser.windows[1] 
window.close()  

# switch to the main window 
browser.windows[0]

browser.quit()

In [102]:
# check if that worked
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16815-1920x1200.jpg'

### Mars Weather

#### Visit the Mars Weather twitter account     __[ here](https://twitter.com/marswxreport?lang=en)__     and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather.

Note: Be sure you are not signed in to twitter, or scraping may become more difficult.<br>
Note: Twitter frequently changes how information is presented on their website. If you are having difficulty getting the correct html tag data, consider researching Regular Expression Patterns and how they can be used in combination with the .find() method.



In [145]:
# pass the chrome executable path  and create an instance of the browser
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [146]:
# Visit the webpage for Mars Weather Twitter account
url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

In [147]:
# parse the html and navigate through an instance of Beautiful Soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [154]:
# Inspect the webpage to find element "article" encloses the tweets, fetch the first tweet out of it 
first_tweet = soup.body.find("div",class_="css-901oao r-jwli3a r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0")

In [155]:
mars_weather = first_tweet.text

In [156]:
# check if that worked 
mars_weather

'InSight sol 520 (2020-05-13) low -92.9ºC (-135.1ºF) high -1.4ºC (29.4ºF)\nwinds from the SW at 5.4 m/s (12.0 mph) gusting to 19.5 m/s (43.7 mph)\npressure at 7.00 hPa'

In [158]:
browser.quit()

### Mars Facts

Visit the Mars Facts webpage __[here](https://space-facts.com/mars/)__  and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.


- Use Pandas to convert the data to a HTML table string.


In [170]:
# pass the chrome executable path  and create an instance of the browser
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [171]:
# store url as string 
url = "https://space-facts.com/mars/"

In [172]:
browser.visit(url)

In [173]:
# Navigate through the parse tree to find table elements
html = browser.html
soup = BeautifulSoup(html,'html.parser')

results = soup.find_all("div", class_="widget widget_text profiles")

In [174]:
# declare lists to hold table columns
col_1 = []
col_2 = []

# loop through the results 
for result in results:
    # Error handling
    try:
        td_item = result.find_all('td', class_='column-1')
        tr_item = result.find_all('td', class_='column-2')
        
        if(td_item and tr_item):
            for el in td_item:
                col_1.append(el.get_text())
            for el in tr_item:
                col_2.append(el.get_text())
        
    except AttributeError as e:
        print(e)

In [175]:
browser.quit()

In [181]:
#load lists to a Dataframe and set column names
mars_facts_df= pd.DataFrame({'Description':col_1,'Value':col_2})

# check 
mars_facts_df


Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [182]:
# convert to HTML table string using pandas to_html method
mars_facts_table = mars_facts_df.to_html()

In [183]:
# check if that worked
mars_facts_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium 

### Mars Hemispheres


- Visit the USGS Astrogeology site __[here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars)__ to obtain high resolution images for each of Mar's hemispheres.


- You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.


- Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.


- Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [74]:
# pass the chrome executable path  and create an instance of the browser
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [75]:
# create the url string
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [76]:
# visit the webpage through the instance

browser.visit(url)
html = browser.html

soup = BeautifulSoup(html, 'html.parser')

In [53]:
# navigate through the parse tree to find the hemisphere header texts
click_text= []
for x in range(4):
    text = soup.find_all('h3')[x].text
    click_text.append(text)
    x += 1

In [54]:
# check 
click_text

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [55]:
# click on the links with the header texts and store img url and title as key-value pairs and append it to a list 

hemisphere_image_urls = []

for x in range(4):
    browser.find_link_by_partial_text(click_text[x]).first.click()
    
    title = browser.find_by_tag('h2.title').text
    browser.click_link_by_text('Sample') #Sample is a clickable link to the full-size image
    
    image_url = browser.windows[1].url   # copy image url 
    hemisphere_image_urls.append({"Title": title, "Img_URL": image_url})
    
    # designate current Image window as a window & close
    window = browser.windows[1] 
    window.close()  
    
    # switch to the main window 
    browser.windows[0]
    
    # go back to the previous page 
    browser.back()
    
    #increment for next clickable header text
    x += 1



In [168]:
# check if that worked
hemisphere_image_urls

[{'Title': 'Cerberus Hemisphere Enhanced',
  'Img_URL': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'Title': 'Schiaparelli Hemisphere Enhanced',
  'Img_URL': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'Title': 'Syrtis Major Hemisphere Enhanced',
  'Img_URL': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'Title': 'Valles Marineris Hemisphere Enhanced',
  'Img_URL': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [169]:
#exit browser instance
browser.quit()