In [1]:
# Set Dependencies
import pandas as pd
import requests
import os
from datetime import datetime
import time
import lxml.html as lh

In [2]:
# Import BeautifulSoup
from bs4 import BeautifulSoup as bs

In [3]:
# Import Browser
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist

In [4]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [5]:
# Capture path to Chrome Driver & initialize browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [6]:
# Page to visit and scrape
news_url = "https://mars.nasa.gov/news/"
browser.visit(news_url)

In [7]:
html = browser.html

In [8]:
# Create a Beautiful Soup object
soup = bs(html, 'html.parser')

In [9]:
## NASA Mars News

In [10]:
# Extract title text
title = soup.title.text
print(title)

News  – NASA’s Mars Exploration Program 


In [11]:
news_title = soup.find("div", class_="content_title").text
print(f"Title: {news_title}")

Title: NASA's MAVEN Explores Mars to Understand Radio Interference at Earth


In [12]:
# Extract the paragraph text
news_p = soup.find("div", class_="article_teaser_body").text
print(f"Paragraph: {news_p}")

Paragraph: NASA’s MAVEN spacecraft has discovered “layers” and “rifts” in the electrically charged part of the upper atmosphere of Mars.


In [13]:
## Mars Space Images

In [14]:
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)
# Establish HTML object
html_image = browser.html
# Parse the HTML with BeautifulSoup
image_soup = bs(html_image, 'html.parser')

# Extract the background image url
featured_image_url = image_soup.find('article')['style'].replace('background-image: url(','').replace(';', '')[1:-1]

# Website's main url
main_url = 'https://www.jpl.nasa.gov'

featured_image_url = main_url + featured_image_url
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17896-1920x1200.jpg'


In [15]:
## Mars Weather

In [16]:
#get mars weather's latest tweet from the website
url_weather = "https://twitter.com/marswxreport?lang=en"
browser.visit(url_weather)

In [23]:
html_weather = browser.html
soup = bs(html_weather, "html.parser")

mars_weather = soup.find("p", class_="text")
print(mars_weather)

None


In [24]:
## Mars Facts

In [25]:
# Read the Mars facts HTML
mars_facts_url = 'https://space-facts.com/mars/'
browser.visit(mars_facts_url)

mars_facts = pd.read_html(mars_facts_url)
# Convert to DataFrame
mars_facts_df = mars_facts[0]
# Preview the DataFrame
mars_facts_df.head()

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [26]:
# Rename the columns
mars_facts_df.columns = ['Factoid', 'Description']
mars_facts_df.head()

Unnamed: 0,Factoid,Description
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [27]:
# Set the index to 'Factoid' and save to HTML
mars_facts_df.set_index('Factoid', inplace=True)
mars_facts_df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n    </tr>\n    <tr>\n      <th>Factoid</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr

In [28]:
# Convert the data to a HTML table string
mars_data = mars_facts_df.to_dict(orient='records')
mars_facts_df.head()

Unnamed: 0_level_0,Description
Factoid,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"


In [29]:
## Mars Hemispheres
hemis_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemis_url)

html_hemis = browser.html
hemis_soup = bs(html_hemis, 'html.parser')

In [30]:
# Extract items containing Mars hemisphere information
items = soup.find_all('div', class_='item')

# Create list for hemisphere image url's
hemisphere_image_urls = []

# Store the main url
hemis_main_url = 'https://astrogeology.usgs.gov'

# Loop through the items to pull image url's
for i in items:
    #Store the title of the info snippet
    title = i.find('h3').text
    #Store the link that leads to the full image website
    partial_image_url = i.find('a', class_='itemLink product-item')['href']
    #Visit the link that has the full image
    browser.visit(hemis_main_url + partial_image_url)
    #HTML object of the individual hemisphere site
    partial_image_html = browser.html
    #Parse HTML w/ BS for each and every individual hemisphere website
    soup = bs(partial_image_html, 'html.parser')
    #Pull the source of the image
    img_url = hemis_main_url + soup.find('img', class_='full-image')['src']
    #Append the info into a list of dicts
    hemisphere_image_urls.append({'title': title, 'img_url': img_url})
    
# Display the hemisphere image url's
hemisphere_image_urls

[]

In [31]:
items

[]