In [1]:
import os
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import pymongo
from splinter import Browser
import cssutils


In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News
Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [4]:
# use splinter to access web page
news_url = "https://mars.nasa.gov/news/"
browser.visit(news_url)

In [5]:
html = browser.html
news_soup = bs(html, "html.parser")
news_title = news_soup.find("div", class_="content_title").find("a").text
news_paragraph = news_soup.find("div", class_="article_teaser_body").text
print(news_title)
print(news_paragraph)

Curiosity's Mars Methane Mystery Continues
The Curiosity rover’s follow-on sampling shows last week’s methane levels have sharply decreased.


### JPL Mars Space Images - Featured Image
- Visit the url for JPL Featured Space Image here.
-  Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
- Make sure to find the image url to the full size .jpg image.
- Make sure to save a complete url string for this image.

In [6]:
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)
image_html = browser.html
image_soup = bs(image_html, "html.parser")
image_soup_style = image_soup.find("article")["style"].replace("background-image: url('", "").replace("');", "")
print(image_soup_style)

/spaceimages/images/wallpaper/PIA19977-1920x1200.jpg


In [7]:
source_url = 'https://www.jpl.nasa.gov'
featured_image_url = source_url + image_soup_style
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA19977-1920x1200.jpg


### Mars Weather
Visit the Mars Weather twitter account here and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather.

In [8]:
html_weather = browser.html
soup = bs(html_weather,'html.parser')

# Find the elements that contain tweets
tweets = soup.find_all('div', class_='js-tweet-text-container')

# Search entries that shows weather related posts to exclude non-weather tweets
for tweet in tweets:
    mars_weather = tweet.find('p').text
    if 'Sol' and 'pressure' in mars_weather:
        print(mars_weather)
        break
    else:
        pass

### Mars Facts
- Visit the Mars Facts webpage here and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
- Use Pandas to convert the data to a HTML table string.

In [9]:
facts_url = "https://space-facts.com/mars/"
# browser.visit(facts_url)
# fact_html = browser.html
tables = pd.read_html(facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [10]:
df = tables[0]
df.columns = ["Description", "Values"]
df.set_index("Description", inplace=True)
df

Unnamed: 0_level_0,Values
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [11]:
html_table = df.to_html()
html_table.replace('\n', '')
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Values</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr

In [12]:
df.to_html("table.html")


### Mars Hemispheres
- Visit the USGS Astrogeology site here to obtain high resolution images for each of Mar's hemispheres.
- You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
- Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.
- Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [13]:
# Retrieve url for the high resolution url

hem_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hem_url)
hem_html = browser.html
hem_soup = bs(hem_html, "html.parser")
hemisphere_image_url = []
all_hem = hem_soup.find_all("div", class_="item")
main_url = "https://astrogeology.usgs.gov/"

for hem in all_hem:
    title = hem.find("img", class_="thumb")["alt"]
    partial_img_url = hem.find("a", class_="itemLink product-item")["href"]
#     print(f"{main_url}{partial_img_url}")

# Retrieve image 
    browser.visit(f"{main_url}{partial_img_url}")
    indiv_img_html = browser.html
    indiv_img_soup = bs(indiv_img_html, "html.parser")
    indiv_partial_img_url = indiv_img_soup.find("img", class_="wide-image")["src"]
    img_url = main_url + indiv_partial_img_url
#     print(indiv_partial_img_url)
    print (img_url)
    hemisphere_image_url.append({"title": title, "img_url": img_url})
hemisphere_image_url


https://astrogeology.usgs.gov//cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
https://astrogeology.usgs.gov//cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
https://astrogeology.usgs.gov//cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
https://astrogeology.usgs.gov//cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


[{'title': 'Cerberus Hemisphere Enhanced thumbnail',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced thumbnail',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced thumbnail',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced thumbnail',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]