# Module 12 Challenge
## Deliverable 1: Scrape Titles and Preview Text from Mars News

In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager
import time
import requests

### Step 1: Visit the Website

1. Use automated browsing to visit the [Mars news site](https://static.bc-edx.com/data/web/mars_news/index.html). Inspect the page to identify which elements to scrape.

      > **Hint** To identify which elements to scrape, you might want to inspect the page by using Chrome DevTools.

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Visit the Mars news site: https://static.bc-edx.com/data/web/mars_news/index.html
url = 'https://redplanetscience.com/'
browser.visit(url)

### Step 2: Scrape the Website

Create a Beautiful Soup object and use it to extract text elements from the website.

In [3]:
# Create a Beautiful Soup object
html = browser.html
new_soup = soup(html, 'html.parser')

In [4]:
results = new_soup.select_one('div', class_='list_text')

In [5]:
header = results.find('div', class_='content_title').text.strip()
print(header)

NASA's Briefcase-Size MarCO Satellite Picks Up Honors


In [6]:
text = results.find('div', class_='article_teaser_body').text
print(text)

The twin spacecraft, the first of their kind to fly into deep space, earn a Laureate from Aviation Week & Space Technology.


In [7]:
# # Extract all the text elements
# # Retrieve the parent divs for all articles
# results = soup.find_all('div', class_='list_text')

# # create alist to store the results 
# article_list = []

# for result in results:
    
#     # Scrape the article title
#     header = result.find('div', class_='content_title').text.strip()
    
#     # Scrape the article paragraph
#     text = result.find('div', class_='article_teaser_body').text
    
#     # print article data
#     print('------------------------')
#     print(header)
#     print(text)
    
#     # create a dict 
#     article_dict = {}
#     article_dict['title'] = header
#     article_dict['preview'] = text
#     # append a dict to a list
#     article_list.append(article_dict)    

In [8]:
#  print(article_list)

In [9]:
browser.quit()

### Step 3: Scrape the image website for a featured image 

In [10]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Visit the Mars news site
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [11]:
# Create a Beautiful Soup object
html = browser.html
image_soup = soup(html, 'html.parser')

In [12]:
relative_image_path = image_soup.find('img', class_='headerimage fade-in')["src"]
featured_img_url = url + relative_image_path
print(featured_img_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


### Step 4: Mars Facts
Visit the Mars Facts webpage [here](https://galaxyfacts-mars.com) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.


In [13]:
import pandas as pd

In [14]:
url = 'https://galaxyfacts-mars.com/'

In [15]:
tables = pd.read_html(url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [16]:
df = tables[1]
df.head()

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [17]:
df.columns = ['Parameter','Value']
df['Parameter'] = df['Parameter'].str.replace(':','')
df.head(10)

Unnamed: 0,Parameter,Value
0,Equatorial Diameter,"6,792 km"
1,Polar Diameter,"6,752 km"
2,Mass,6.39 × 10^23 kg (0.11 Earths)
3,Moons,2 ( Phobos & Deimos )
4,Orbit Distance,"227,943,824 km (1.38 AU)"
5,Orbit Period,687 days (1.9 years)
6,Surface Temperature,-87 to -5 °C
7,First Record,2nd millennium BC
8,Recorded By,Egyptian astronomers


In [18]:
df_html = df.to_html()
print(df_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Parameter</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons</td>
      <td>2 ( Phobos &amp; Deimos )</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
      <td>Re

### Mars Hemispheres

* Visit the astrogeology site [here](https://marshemispheres.com/) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [35]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Visit the https://marshemispheres.com/
url = 'https://marshemispheres.com/'
browser.visit(url)

In [36]:
# Retrieve all elements that contain book information
items = browser.find_by_css('a.product-item img')
#print(items)

hemisphere_image_url = []

for item in range(len(items)):

        
#     # Store link that leads to full image website
    browser.find_by_css('a.product-item img')[item].click()
    header = browser.find_by_css('h2.title').text
    sample = browser.links.find_by_text('Sample').first
    full_url = sample['href']

        
    # Append the retreived information into a list of dictionaries 
    hemisphere_image_url.append({"title" : header, "img_url" : full_url})
    browser.back()


In [37]:
print(hemisphere_image_url)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]


In [38]:
browser.quit()