In [46]:
# import dependencies
import os
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import re
from selenium import webdriver
from selenium.webdriver.common.keys import Keys


In [4]:
!which chromedriver

/usr/local/bin/chromedriver


In [5]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## Mars News

In [6]:
# URL of page to be scraped for articles
article_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'


In [7]:
# use spliter to read and parse html
browser.visit(article_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


In [8]:
# find and print first article title 
news_title = soup.find(class_ ='content_title').text
print(news_title)


Mars 2020 Rover: T-Minus One Year and Counting 


In [9]:
# find and print first article description 
news_p = soup.find(class_='article_teaser_body').text
print(news_p)


The launch period for NASA's next rover, Mars 2020, opens exactly one year from today, July 17, 2020, and extends through Aug. 5, 2020.


## Featured Image

In [10]:
# URL of page to be scraped for images
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'


In [11]:
# use spliter to read and parse html
browser.visit(image_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


In [12]:
# find and save URL for the current featured image
article = soup.find('article')
style = article['style']
parsed_string = style.split("'")
base_url = parsed_string[1]
url = ('https://www.jpl.nasa.gov' + base_url)
print(url)


https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18273-1920x1200.jpg


## Mars Weather

In [13]:
# URL of page to be scraped for weather info
weather_url = 'https://twitter.com/marswxreport?lang=en'


In [14]:
# use spliter to read and parse html
browser.visit(weather_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


In [15]:
# find and save latest tweet text
tweet= soup.find(string=re.compile("InSight"))
print(tweet)


InSight sol 222 (2019-07-12) low -99.7ºC (-147.5ºF) high -24.8ºC (-12.6ºF)
winds from the SSE at 4.2 m/s (9.4 mph) gusting to 15.6 m/s (34.8 mph)
pressure at 7.60 hPa


## Mars Facts


In [16]:
# URL of page to be scraped for facts data
facts_url = 'https://space-facts.com/mars'
    

In [17]:
# use Panda's `read_html` to parse the url & output as dataframe
facts_tables = pd.read_html(facts_url)
facts_tables
mars_df = facts_tables[1]
mars_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [18]:
# rename columns
mars_df.columns = ["Description","Values"]
mars_df


Unnamed: 0,Description,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [19]:
# reset index
mars_df = mars_df.iloc[1:]
mars_df.set_index('Description', inplace=True)
mars_df 


Unnamed: 0_level_0,Values
Description,Unnamed: 1_level_1
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [21]:
# convert to HTML string
mars_table = mars_df.to_html()
mars_table


'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Values</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

## Mars Hemispheres

In [55]:
# URL of first page to be scraped for image
hem_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'


In [56]:
# use spliter to read and parse html
browser.visit(hem_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


In [66]:
# create lists to hold hemisphere URLs and hemisphere titles
hem_url_list = []
hem_title_list = []

# inspect site, locate the instances in the HTML with a 'div' with a 'description' class
image_div = soup.find_all("div", class_="description")

# loop through each of those instances, identifying the base image URL, and append to list
for div in image_div:
    hem_url_list.append(div.a["href"])
    
# while looping, also identify the title, and append to that list
    hem_title_list.append(div.h3.text)
    
print(hem_url_list)
print(title_list)


['/search/map/Mars/Viking/cerberus_enhanced', '/search/map/Mars/Viking/schiaparelli_enhanced', '/search/map/Mars/Viking/syrtis_major_enhanced', '/search/map/Mars/Viking/valles_marineris_enhanced']
['Cerberus Hemisphere Enhanced', 'Schiaparelli Hemisphere Enhanced', 'Syrtis Major Hemisphere Enhanced', 'Valles Marineris Hemisphere Enhanced']


In [75]:
# create list to hold image urls
image_list = []

# create a new loop to iterate through each URL 
for url in hem_url_list:
    
# concatenate the base url to each hem url to create the url paths we'll need
    url_path = ('https://astrogeology.usgs.gov' + url)

# use spliter to read and parse html
    browser.visit(url_path)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
# drill down to find image URL
    image_div = soup.find("div", id="wide-image")
    image_line = image_div.find("img", class_="wide-image")
    image = image_line["src"]
    image_url = ('https://astrogeology.usgs.gov' + image)
    
# append to image list
    image_list.append(image_url)

print(image_list)
    

['https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg', 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg', 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg', 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg']


In [77]:
# create list to hold dictionaries
hemisphere_image_urls =[]

# loop through title_list and image_list and add values to dictionaries
for title, image in zip(title_list, image_list):
    dict={}
    dict['title'] = title
    dict['img_url'] = image
    
# append dictionaries to list
    hemisphere_image_urls.append(dict)

print(hemisphere_image_urls)


[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]


In [22]:
# find and save title for the cereberus image
cereberus_title = soup.find("h2", class_="title").text
print(cereberus_title)


Cerberus Hemisphere Enhanced


In [23]:
# start a list and begin appending the dictionary of hemisphere data
mars_list =[]
mars_list.append({'title': cereberus_title, 'img_url': cereberus_url})


In [24]:
# URL of second page to be scraped for image
schiaparelli_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'


In [25]:
# use spliter to read and parse html
browser.visit(schiaparelli_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


In [26]:
# find and save URL for the schiaparelli image
image_div = soup.find("div", id="wide-image")
image_line = image_div.find("img", class_="wide-image")
schiaparelli_image = image_line["src"]
schiaparelli_url = ('https://astrogeology.usgs.gov' + schiaparelli_image)
print(schiaparelli_url)


https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg


In [27]:
# find and save title for the schiaparelli image
schiaparelli_title = soup.find("h2", class_="title").text
print(schiaparelli_title)


Schiaparelli Hemisphere Enhanced


In [28]:
# append hemisphere data to dictionary
mars_list.append({'title': schiaparelli_title, 'img_url': schiaparelli_url})


In [29]:
# URL of third page to be scraped for image
syrtis_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'


In [30]:
# use spliter to read and parse html
browser.visit(syrtis_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


In [31]:
# find and save URL for the syrtis image
image_div = soup.find("div", id="wide-image")
image_line = image_div.find("img", class_="wide-image")
syrtis_image = image_line["src"]
syrtis_url = ('https://astrogeology.usgs.gov' + syrtis_image)
print(syrtis_url)


https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg


In [32]:
# find and save title for the syrtis image
syrtis_title = soup.find("h2", class_="title").text
print(syrtis_title)


Syrtis Major Hemisphere Enhanced


In [33]:
# append hemisphere data to dictionary
mars_list.append({'title': syrtis_title, 'img_url': syrtis_url})


In [34]:
# URL of fourth page to be scraped for image
valles_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'


In [35]:
# use spliter to read and parse html
browser.visit(valles_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


In [36]:
# find and save URL for the valles image
image_div = soup.find("div", id="wide-image")
image_line = image_div.find("img", class_="wide-image")
valles_image = image_line["src"]
valles_url = ('https://astrogeology.usgs.gov' + valles_image)
print(valles_url)


https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


In [37]:
# find and save title for the valles image
valles_title = soup.find("h2", class_="title").text
print(valles_title)


Valles Marineris Hemisphere Enhanced


In [38]:
# append hemisphere data to dictionary
mars_list.append({'title': valles_title, 'img_url': valles_url})
mars_list

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]