# Module 12 Challenge
## Deliverable 1: Scrape Titles and Preview Text from Mars News

In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
browser = Browser('chrome')

### Step 1: Visit the Website

1. Use automated browsing to visit the [Mars news site](https://static.bc-edx.com/data/web/mars_news/index.html). Inspect the page to identify which elements to scrape.

      > **Hint** To identify which elements to scrape, you might want to inspect the page by using Chrome DevTools.

In [10]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Visit the Mars news site: https://static.bc-edx.com/data/web/mars_news/index.html
url = 'https://redplanetscience.com/'
browser.visit(url)

### Step 2: Scrape the Website

Create a Beautiful Soup object and use it to extract text elements from the website.

In [11]:
# Create a Beautiful Soup object
html = browser.html
soup = soup(html, 'html.parser')

In [13]:
# Extract all the text elements
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='list_text')

# create alist to store the results 
article_list = []

for result in results:
    
    # Scrape the article title
    header = result.find('div', class_='content_title').text.strip()
    
    # Scrape the article paragraph
    text = result.find('div', class_='article_teaser_body').text
    
    # print article data
    print('------------------------')
    print(header)
    print(text)
    
    # create a dict 
    article_dict = {}
    article_dict['title'] = header
    article_dict['preview'] = text
    # append a dict to a list
    article_list.append(article_dict)    

------------------------
Newfound Martian Aurora Actually the Most Common; Sheds Light on Mars' Changing Climate
A type of Martian aurora first identified by NASA’s MAVEN spacecraft in 2016 is actually the most common form of aurora occurring on the Red Planet, according to new results from the mission.
------------------------
NASA's Mars 2020 Comes Full Circle
Aiming to pinpoint the Martian vehicle's center of gravity, engineers took NASA's 2,300-pound Mars 2020 rover for a spin in the clean room at JPL. 
------------------------
Follow NASA's Perseverance Rover in Real Time on Its Way to Mars
A crisply rendered web application can show you where the agency's Mars 2020 mission is right now as it makes its way to the Red Planet for a Feb. 18, 2021, landing.
------------------------
Q&A with the Student Who Named Ingenuity, NASA's Mars Helicopter
As a longtime fan of space exploration, Vaneeza Rupani appreciates the creativity and collaboration involved with trying to fly on another pl

In [14]:
print(article_list)

[{'title': "Newfound Martian Aurora Actually the Most Common; Sheds Light on Mars' Changing Climate", 'preview': 'A type of Martian aurora first identified by NASA’s MAVEN spacecraft in 2016 is actually the most common form of aurora occurring on the Red Planet, according to new results from the mission.'}, {'title': "NASA's Mars 2020 Comes Full Circle", 'preview': "Aiming to pinpoint the Martian vehicle's center of gravity, engineers took NASA's 2,300-pound Mars 2020 rover for a spin in the clean room at JPL. "}, {'title': "Follow NASA's Perseverance Rover in Real Time on Its Way to Mars", 'preview': "A crisply rendered web application can show you where the agency's Mars 2020 mission is right now as it makes its way to the Red Planet for a Feb. 18, 2021, landing."}, {'title': "Q&A with the Student Who Named Ingenuity, NASA's Mars Helicopter", 'preview': 'As a longtime fan of space exploration, Vaneeza Rupani appreciates the creativity and collaboration involved with trying to fly on 

In [15]:
browser.quit()

### Step 3: Scrape the image website for a featured image 

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Visit the Mars news site
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [3]:
# Create a Beautiful Soup object
html = browser.html
soup = soup(html, 'html.parser')

In [5]:
relative_image_path = soup.find('img', class_='headerimage fade-in')["src"]
featured_img_url = url + relative_image_path
print(featured_img_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


### Step 4: Mars Facts
Visit the Mars Facts webpage [here](https://galaxyfacts-mars.com) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.


In [17]:
import pandas as pd

In [18]:
url = 'https://galaxyfacts-mars.com/'

In [20]:
tables = pd.read_html(url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [22]:
df = tables[1]
df.head()

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [28]:
df.columns = ['Parameter','Value']
df['Parameter'] = df['Parameter'].str.replace(':','')
df.head(10)

Unnamed: 0,Parameter,Value
0,Equatorial Diameter,"6,792 km"
1,Polar Diameter,"6,752 km"
2,Mass,6.39 × 10^23 kg (0.11 Earths)
3,Moons,2 ( Phobos & Deimos )
4,Orbit Distance,"227,943,824 km (1.38 AU)"
5,Orbit Period,687 days (1.9 years)
6,Surface Temperature,-87 to -5 °C
7,First Record,2nd millennium BC
8,Recorded By,Egyptian astronomers


In [42]:
df_html = df.to_html()
print(df_html)

NameError: name 'df' is not defined

### Mars Hemispheres

* Visit the astrogeology site [here](https://marshemispheres.com/) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Visit the https://marshemispheres.com/
url = 'https://marshemispheres.com/'
browser.visit(url)

In [3]:
# Create a Beautiful Soup object
hemis_html = browser.html
soup = soup(hemis_html, 'html.parser')

# Store the main_ul 
hemispheres_main_url = 'https://marshemispheres.com/'
    
# Retrieve all elements that contain book information
items = soup.find_all('div', class_='item')
# print(items)

hemisphere_image_url = []

for item in items:
    header = item.find('h3')
#     print(header)
        
    # Store link that leads to full image website
    partial_url = item.find('a', class_='itemLink product-item')['href']
#     print(partial_url)
    
    # Visit the link that contains the full image website 
    browser.visit(hemispheres_main_url + partial_url)
    
    # HTML Object of individual hemisphere information website 
    partial_html = browser.html
#     print(partial_html)
    
    # Parse HTML with Beautiful Soup for every individual hemisphere information website 
    soup = soup(partial_html, 'html.parser')
    
    end_url = soup.find('img', class_='wide-image')['src']
    print(end_url)
    
    # Retrieve full image source 
#     img_url = hemispheres_main_url + end_url
#     print(img_url)

    
    # Append the retreived information into a list of dictionaries 
#     hemisphere_image_url.append({"title" : header, "img_url" : img_url})

AttributeError: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?

[<div class="item">
<a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
<div class="description">
<a class="itemLink product-item" href="cerberus.html">
<h3>Cerberus Hemisphere Enhanced</h3>
</a>
<span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
<p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
</div>
<!-- end description -->
</div>, <div class="item">
<a class="itemLink product-item" href="schiaparelli.html"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png"/></a>
<div class="description">
<a class="itemLink product-