In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
import pymongo
import pandas as pd
from splinter import Browser
import time

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)

In [3]:
# Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. 

url = 'https://mars.nasa.gov/news/'
browser.visit(url)
time.sleep(1)
html = browser.html
soup = bs(html, "html.parser")
mars = {}

In [4]:
# Assign the text to variables that you can reference later.

mars['news_title'] = soup.find('div', class_='content_title').get_text().strip()
mars['news_p']= soup.find("div", class_="article_teaser_body").get_text().strip()
mars

{'news_title': "Robotic Toolkit Added to NASA's Mars 2020 Rover",
 'news_p': "The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover."}

In [19]:
# Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string 
# to a variable called `featured_image_url`.

url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars/assets/images/logo_nasa_trio_black@2x.png'
browser.visit(url2)
html = browser.html
soup = bs(html, "html.parser")

In [20]:
# Make sure to find the image url to the full size `.jpg` image.
# Make sure to save a complete url string for this image.
image_path = soup.find_all('img')[0]["src"]
mars['featured_image_url'] = url2 + image_path
featured_image_url
mars

{'news_title': "Robotic Toolkit Added to NASA's Mars 2020 Rover",
 'news_p': "The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover.",
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars/assets/images/logo_nasa_trio_black@2x.png/assets/images/logo_nasa_trio_black@2x.png'}

In [7]:
# Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts 
# about the planet including Diameter, Mass, etc.

url3 = 'https://space-facts.com/mars/'
browser.visit(url3)
html = browser.html
soup = bs(html, "html.parser")

In [8]:
tables = pd.read_html(url3)
tables

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [11]:
df=tables[1]
df.head()

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [37]:
renamed_df = df.rename(columns={
    0: "Description",
    1: "Value"
})
renamed_df.head()

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [38]:
# Use Pandas to convert the data to a HTML table string.
df.to_html('mars_facts.html')

In [31]:
# Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.
# You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
# Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the 
# hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.


# url4='https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' 
url4='https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'
browser.visit(url4)
html = browser.html
soup = bs(html, "html.parser")
mars_title_and_images={}


In [32]:
html

'<html lang="en"><head>\n\t\t<link rel="stylesheet" type="text/css" href="//ajax.googleapis.com/ajax/libs/jqueryui/1.11.4/themes/smoothness/jquery-ui.css">\n<title>Cerberus Hemisphere Enhanced | USGS Astrogeology Science Center</title>\n\t\t<meta name="description" content="Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from…">\n\t\t<meta name="keywords" content="USGS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping">\n\t\t<meta http-equiv="X-UA-Compatible" content="IE=edge">\n\t\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n\t\t<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">\n\t\t<meta name="google-site-verification" content="x61hXXVj7wtfBSNOPnTftajMsZ5yB2W-qRoyr7GtOKM">\n\t\t<!--<link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Open+Sans:400italic,400,bold"/>-->\n\t\t<link rel="stylesheet" media="screen"

In [36]:
mars_title_and_images['cerberus_title'] = soup.find('h2', class_="title").get_text().strip()
# soup.select('h2.title')
mars_title_and_images

{'cerberus_title': 'Cerberus Hemisphere Enhanced',
 'cerberus_image': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced/images/usgs_logo_main_2x.png'}

In [39]:
cerberus_image= soup.find_all('img')[0]["src"]
mars_title_and_images['cerberus_image']= url4 + cerberus_image
mars_title_and_images

{'cerberus_title': 'Cerberus Hemisphere Enhanced',
 'cerberus_image': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced/images/usgs_logo_main_2x.png'}

In [44]:
url5='https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'
browser.visit(url5)
html = browser.html
soup = bs(html, "html.parser")

mars_title_and_images['valles_marineris_title'] = soup.find('h2', class_="title").get_text().strip()

In [45]:
valles_marineris_image= soup.find_all('img')[0]["src"]
mars_title_and_images['valles_marineris_image']= url5 + valles_marineris_image

In [46]:
url6='https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'
browser.visit(url6)
html = browser.html
soup = bs(html, "html.parser")

mars_title_and_images['schiaparelli_title'] = soup.find('h2', class_="title").get_text().strip()

In [47]:
schiaparelli_image= soup.find_all('img')[0]["src"]
mars_title_and_images['schiaparelli_image']= url6 + schiaparelli_image

In [48]:
url7='https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'
browser.visit(url5)
html = browser.html
soup = bs(html, "html.parser")

mars_title_and_images['syrtis_major_title'] = soup.find('h2', class_="title").get_text().strip()

In [49]:
syrtis_major_image= soup.find_all('img')[0]["src"]
mars_title_and_images['syrtis_major_image']= url7 + syrtis_major_image

In [52]:
# Append the dictionary with the image url string and the hemisphere title to a list. 
# This list will contain one dictionary for each hemisphere.

hemisphere_image_urls =[]
hemisphere_image_urls.append(mars_title_and_images)
hemisphere_image_urls 

[{'cerberus_title': 'Cerberus Hemisphere Enhanced',
  'cerberus_image': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced/images/usgs_logo_main_2x.png',
  'valles_marineris_title': 'Valles Marineris Hemisphere Enhanced',
  'valles_marineris_image': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced/images/usgs_logo_main_2x.png',
  'schiaparelli_title': 'Schiaparelli Hemisphere Enhanced',
  'schiaparelli_image': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced/images/usgs_logo_main_2x.png',
  'syrtis_major_title': 'Valles Marineris Hemisphere Enhanced',
  'syrtis_major_image': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced/images/usgs_logo_main_2x.png'}]

In [53]:
browser.quit()

In [54]:
!jupyter nbconvert --to script scrape_mars.ipynb

This application is used to convert notebook files (*.ipynb) to various other
formats.


Options

-------



Arguments that take values are actually convenience aliases to full
Configurables, whose aliases are listed on the help line. For more information
on full configurables, see '--help-all'.


--debug

    set log level to logging.DEBUG (maximize logging output)

--generate-config

    generate default config file

-y

    Answer yes to any questions instead of prompting.

--execute

    Execute the notebook prior to export.

--allow-errors

    Continue notebook execution even if one of the cells throws an error and include the error message in the cell output (the default behaviour is to abort conversion). This flag is only relevant if '--execute' was specified, too.

--stdin

    read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'

--stdout

    Write notebook output to stdout instead of files.

--inplace

    Run nbconvert in 

