#### comments and documentation will be listed here, but NOT added in the scrape_mars.py  
#### Section header titles here based on homework readme.md section headers

In [1]:
# import dependencies

from bs4 import BeautifulSoup
import pandas as pd
from splinter import Browser

In [2]:
# Windows
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News

In [3]:
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

html = browser.html
mars_news_soup = BeautifulSoup(html, 'html.parser')

In [4]:
#latest article title. 
#<div class="content_title"><a href="/news/8512/nasa-mars-mission-connects-with-bosnian-and-herzegovinian-town/" target="_self">NASA Mars Mission Connects With Bosnian and Herzegovinian Town</a></div>
news_title = mars_news_soup.find('div', class_='content_title').text
news_title

'NASA Mars Mission Connects With Bosnian and Herzegovinian Town'

In [5]:
# latest article paragraph
# <div class="article_teaser_body">
news_p = mars_news_soup.find('div', class_='article_teaser_body').text
news_p

'A letter from NASA was presented to the mayor of Jezero, Bosnia-Herzegovina, honoring the connection between the town and Jezero Crater, the Mars 2020 rover landing site.'

## JPL Mars Featured Image

In [6]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [7]:
# click "Full Image" button
# ways to find and click: https://splinter.readthedocs.io/en/latest/elements-in-the-page.html

browser.click_link_by_partial_text('FULL IMAGE')

In [8]:
# click "more info" button
# a sleep is added in the scrape_mars. without it, splinter attempts to click 'more info' before this desired page is loaded
browser.click_link_by_partial_text('more info')

In [9]:
# <figure class="lede">
#   <a href="/spaceimages/images/largesize/PIA18886_hires.jpg">
# </figure>

html = browser.html
image_soup = BeautifulSoup(html, 'html.parser')

featured_image_route = image_soup.find('figure', class_='lede').a['href']
featured_image_route

'/spaceimages/images/largesize/PIA19643_hires.jpg'

In [10]:
featured_image_url = f'https://www.jpl.nasa.gov{featured_image_route}'
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19643_hires.jpg'

## Mars Weather - Twitter

In [11]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

html = browser.html
tweet_soup = BeautifulSoup(html, 'html.parser')

In [12]:
#<p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" lang="en" data-aria-label-part="0">

mars_weather = tweet_soup.find('p', class_='TweetTextSize').text
mars_weather

'InSight sol 292 (2019-09-22) low -101.2ºC (-150.1ºF) high -26.4ºC (-15.6ºF)\nwinds from the SE at 4.6 m/s (10.2 mph) gusting to 16.6 m/s (37.1 mph)\npressure at 7.40 hPapic.twitter.com/CNzfmrrN6P'

## Mars Facts - Pandas HTML Table String

In [13]:
url = 'https://space-facts.com/mars/'

temp = pd.read_html(url)
temp

# two tables come back from read_html. The first being a Mars & Earth compare. Neither tables have column names. 

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [14]:
# this is the correct table

mars_df = pd.read_html(url)[1]
mars_df.columns = ['Property', 'Value']
mars_df.set_index('Property', inplace = True)
mars_df

Unnamed: 0_level_0,Value
Property,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [15]:
mars_df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Property</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  

## Mars Hemispheres

In [16]:
# The website was originally down at the time of the original assignment, but is now up.

# url = 'https://web.archive.org/web/20181114171728/https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'


url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

browser.visit(url)

html = browser.html
hemi_soup = BeautifulSoup(html, 'html.parser')

### Method 1 of getting image titles & URLs

In [17]:
# <h3>Cerberus Hemisphere Enhanced</h3>
# <h3>Schiaparelli Hemisphere Enhanced</h3>
# <h3>Syrtis Major Hemisphere Enhanced</h3>
# <h3>Valles Marineris Hemisphere Enhanced</h3>

# Method 1 is using a loop and empty list to get the titles
# Then search for the image URLs based on the h3 titles found

links = hemi_soup.find_all('h3')

hemi_titles = [hemi.text for hemi in links]
hemi_titles

# hemi_titles = []
# for hemi in links:
#     hemi_titles.append(hemi.text)

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [18]:
hemisphere_image_urls = []
counter = 0

print(f"Starting: \n{'-'*20}")

for hemi in hemi_titles:
    hemi_dict = {}
    
    browser.click_link_by_partial_text(hemi)
    hemi_dict['title'] = hemi
    hemi_dict['img_url'] = browser.find_by_text('Sample').first['href']
    hemisphere_image_urls.append(hemi_dict)
#     print(hemi_dict) --- this line returns +1 line extra of the dictionary, rather than the newest addition. Prints 1, then 2, then 3, then all 4. 
    print(f"{hemisphere_image_urls[counter]} \n{'-'*20}")
    
    counter += 1
    browser.back()

print(f"Done. {counter} hemispheres found.")

Starting: 
--------------------
{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'} 
--------------------
{'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'} 
--------------------
{'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'} 
--------------------
{'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'} 
--------------------
Done. 4 hemispheres found.


### Method 2 of getting image titles & URLs

In [19]:
# Using Splinter's 'find_by_css'. Link: https://splinter.readthedocs.io/en/latest/finding.html
# This is used in scrape_mars.py

## <a---------------class = "itemLink product-item"> ... <h3></h3>
# <a href="/search/map/Mars/Viking/cerberus_enhanced" class="itemLink product-item">
#   <h3>Cerberus Hemisphere Enhanced</h3>
# </a>

hemisphere_image_urls = []
links = browser.find_by_css("a.product-item h3")

for item in range(len(links)):
    hemi_dict = {}
    
    browser.find_by_css("a.product-item h3")[item].click()
    hemi_dict["title"] = browser.find_by_css("h2.title").text
    hemi_dict["img_url"] = browser.find_link_by_text("Sample").first["href"]
    hemisphere_image_urls.append(hemi_dict)
    browser.back()

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]