### Import dependencies

In [1]:
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd

## NASA Mars News

In [2]:
# Setup configuration variables to enable Splinter to interact with browser
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# url to webpage that will be scraped & inform browser to visit that webpage
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)

In [4]:
# create beautiful soup object
soup= BeautifulSoup(browser.html, "html.parser")

In [5]:
# return results as an iterable list
results = soup.find_all('li', class_="slide")
print(len(results))

40


In [6]:
# print first instance of results as it is the most recent news article since articles are in descending order
print(results[0].prettify())

<li class="slide">
 <div class="image_and_description_container">
  <a href="/news/8578/media-get-a-close-up-of-nasas-mars-2020-rover/" target="_self">
   <div class="rollover_description">
    <div class="rollover_description_inner">
     The clean room at NASA's Jet Propulsion Laboratory was open to the media to see NASA's next Mars explorer before it leaves for Florida in preparation for a summertime launch.
    </div>
    <div class="overlay_arrow">
     <img alt="More" src="/assets/overlay-arrow.png"/>
    </div>
   </div>
   <div class="list_image">
    <img alt="Members of the media interview the builders of the Mars 2020 mission inside JPL's clean room." src="/system/news_items/list_view_images/8578_24741_PIA23586-SAFfloor-MAIN-16_320x240px.jpg"/>
   </div>
   <div class="bottom_gradient">
    <div>
     <h3>
      Media Get a Close-Up of NASA's Mars 2020 Rover
     </h3>
    </div>
   </div>
  </a>
  <div class="list_text">
   <div class="list_date">
    December 27, 2019
   <

In [7]:
# set first instance as a variable for lastest news article
latest_news=results[0]

In [8]:
# create variable for articles title
title=latest_news.find("div", class_="content_title").text
title

"Media Get a Close-Up of NASA's Mars 2020 Rover"

In [9]:
# create variable for articles text
text=latest_news.find("div", class_="article_teaser_body").text
text

"The clean room at NASA's Jet Propulsion Laboratory was open to the media to see NASA's next Mars explorer before it leaves for Florida in preparation for a summertime launch."

## JPL Mars Space Images - Featured Image

In [10]:
# url to webpage that will be scraped & inform browser to visit that webpage
url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url2)

In [11]:
# tell browser to click on full image button for the featured image
browser.click_link_by_partial_text('FULL')



In [12]:
# create beautiful soup object
soup = BeautifulSoup(browser.html, 'html.parser')

In [13]:
# find featured image 
image=soup.find_all('img', class_="fancybox-image")
image

[<img class="fancybox-image" src="/spaceimages/images/mediumsize/PIA14417_ip.jpg" style="display: inline;"/>]

In [14]:
# pull image source for feature image
img_src=image[0]['src']
img_src

'/spaceimages/images/mediumsize/PIA14417_ip.jpg'

In [15]:
# create url for featured image and store as a variable
ft_image_url = 'https://www.jpl.nasa.gov' + img_src
ft_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14417_ip.jpg'

## Mars Weather

In [16]:
# url to webpage that will be scraped & inform browser to visit that webpage
url3 = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url3)

In [17]:
# create beautiful soup object
soup = BeautifulSoup(browser.html, 'html.parser')

In [18]:
# find list of all tweets context
tweets=soup.find_all('div', class_='js-tweet-text-container')
len(tweets)

20

In [19]:
# look at first tweet in list
tweets[0]

<div class="js-tweet-text-container">
<p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">InSight sol 395 (2020-01-06) low -99.7ºC (-147.4ºF) high -17.8ºC (-0.1ºF)
winds from the SE at 6.2 m/s (13.8 mph) gusting to 20.3 m/s (45.3 mph)
pressure at 6.40 hPa<a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/GFhr7zuVp2">pic.twitter.com/GFhr7zuVp2</a></p>
</div>

In [20]:
# pull the text from that tweet thats stored in the p tag
mars_weather=tweets[0].find('p').text
mars_weather

'InSight sol 395 (2020-01-06) low -99.7ºC (-147.4ºF) high -17.8ºC (-0.1ºF)\nwinds from the SE at 6.2 m/s (13.8 mph) gusting to 20.3 m/s (45.3 mph)\npressure at 6.40 hPapic.twitter.com/GFhr7zuVp2'

In [21]:
# clean up text pulled from tweet
mars_weather = mars_weather.replace('\n', ', ')
mars_weather

'InSight sol 395 (2020-01-06) low -99.7ºC (-147.4ºF) high -17.8ºC (-0.1ºF), winds from the SE at 6.2 m/s (13.8 mph) gusting to 20.3 m/s (45.3 mph), pressure at 6.40 hPapic.twitter.com/GFhr7zuVp2'

## Mars Facts

In [22]:
# url to webpage that will be scraped & inform browser to visit that webpage
url4 = 'https://space-facts.com/mars/'
browser.visit(url4)

In [23]:
# scrape tabular data from webpage using read_html pandas function
# assign first instance of tabular data to a variable
dfs = pd.read_html(url4)
mars_facts = dfs[0]
mars_facts

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [24]:
# clean df, rename columns 
mars_facts=mars_facts.rename(columns={0:'Description', 1:'Value'})
mars_facts

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [25]:
html_table=mars_facts.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium 

In [26]:
html_table=html_table.replace('\n', '')
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Description</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronom

## Mars Hemispheres

In [27]:
# url to webpage that will be scraped & inform browser to visit that webpage
url5 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url5)

In [28]:
soup = BeautifulSoup(browser.html, 'html.parser')

In [29]:
results = soup.find_all('div', class_='description')
len(results)

4

In [30]:
# create empty list to store dictionaries for each hemispheres title and image url
hemisphere_image_urls = []

for result in results:
    browser.visit(url5)
    
    a=result.find('a')
    title=a.find('h3').text
    
    browser.click_link_by_partial_text(title)
    browser.click_link_by_partial_text('Open')
    soup = BeautifulSoup(browser.html, 'html.parser')
    img = soup.find_all('img', class_='wide-image')
    img_src = img[0]['src']
    img_url = 'https://astrogeology.usgs.gov' + img_src
    
    hems={'title':title, 'image_url':img_url}
    hemisphere_image_urls.append(hems)
    
    
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]