In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd

In [2]:
## Initialize browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
#Step 1: Web Scraping

## Part 1) Scrape NASA Mars News for latest headline

## URL of page to be scraped
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

browser.visit(url)
html = browser.html


In [4]:
## Create BeautifulSoup object; parse  with 'html.parser'
soup = bs(html, "lxml")


In [5]:
recent_news = soup.find('li', class_='slide')
print(recent_news)

<li class="slide"><div class="image_and_description_container"><a href="/news/8759/nasas-new-mars-rover-will-use-x-rays-to-hunt-fossils/" target="_self"><div class="rollover_description"><div class="rollover_description_inner">PIXL, an instrument on the end of the Perseverance rover's arm, will search for chemical fingerprints left by ancient microbes.</div><div class="overlay_arrow"><img alt="More" src="/assets/overlay-arrow.png"/></div></div><div class="list_image"><img alt="Illustration of the PIXL instrument on board of Perseverance rover" src="/system/news_items/list_view_images/8759_PIA24092-Main-320.jpg"/></div><div class="bottom_gradient"><div><h3>NASA's New Mars Rover Will Use X-Rays to Hunt Fossils</h3></div></div></a><div class="list_text"><div class="list_date">September 22, 2020</div><div class="content_title"><a href="/news/8759/nasas-new-mars-rover-will-use-x-rays-to-hunt-fossils/" target="_self">NASA's New Mars Rover Will Use X-Rays to Hunt Fossils</a></div><div class="

In [6]:
news_title = recent_news.find('h3').text
print(news_title)

NASA's New Mars Rover Will Use X-Rays to Hunt Fossils


In [7]:
news_p = recent_news.find(class_='rollover_description_inner').text
print(news_p)

PIXL, an instrument on the end of the Perseverance rover's arm, will search for chemical fingerprints left by ancient microbes.


In [8]:
## Part 2) Use Splinter to Scrape JPL Mars Space Images - Featured Image

## URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

browser.visit(url)
html = browser.html


## Create BeautifulSoup object; parse  with 'html.parser'
soup = bs(html, "lxml")


In [9]:
## Find image URL for current featured Mars image
base_url = 'https://www.jpl.nasa.gov'
style = soup.find('div',class_='carousel_items').article["style"]
featured_image_url = base_url + style.split("url")[1].strip(";(')")
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA15253-1920x1200.jpg


In [10]:
## Part 3)Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

url = 'https://space-facts.com/mars/'
    
table = pd.read_html(url)[0]
table

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [11]:
table.rename(columns={0:"mars_facts", 1:"data"}, inplace=True)
table

Unnamed: 0,mars_facts,data
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [12]:
html_table = table.to_html(index=False)
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>mars_facts</th>\n      <th>data</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [13]:
html_table.replace('\n', '')
!open table.html