### NASA Mars News

Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. 

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from splinter import Browser

In [2]:
# Set the executable path and initialize the chrome browser in splinter
# Below two lines of code are for Mac users
# executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
# browser = Browser('chrome', **executable_path)

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
url = "https://mars.nasa.gov/news/" 
browser.visit(url)

In [4]:
# convert the browser.html to a soup
html = browser.html
nasa_soup = BeautifulSoup(html, 'html.parser')

In [5]:
# Find the latest news title
first_title = nasa_soup.select_one("ul.item_list li.slide div.content_title").text
print(f'The latest news title is "{first_title}".')

AttributeError: 'NoneType' object has no attribute 'text'

In [None]:
# Alternate method to get the latest title 
# title = nasa_soup.find('div', class_="content_title").get_text()

In [None]:
# Find the latest paragraph text
latest_paragraph_text = nasa_soup.select_one("ul.item_list li.slide div.article_teaser_body").text
print(f'The latest news paragraph is:')
print(latest_paragraph_text)

### JPL Mars Space Images - Featured Image

* Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).

* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called `featured_image_url`.

* Make sure to find the image url to the full size `.jpg` image.

* Make sure to save a complete url string for this image.

```python
# Example:
featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16225_hires.jpg'

In [6]:
# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [7]:
# Find and click the full image button
full_image_id = browser.find_by_id('full_image')
full_image_id.click()


In [8]:
# Find the more info button and click that
browser.is_element_present_by_text('more info', wait_time=1)
more_info_elem = browser.find_link_by_partial_text('more info')
more_info_elem.click()


In [9]:
# Parse the resulting html with soup
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')

In [10]:
# find the relative image url
image_url = img_soup.select_one('figure.lede a img').get("src")
image_url

'/spaceimages/images/largesize/PIA22831_hires.jpg'

In [11]:
# get a complete URL
featured_image_url = f'https://www.jpl.nasa.gov{image_url}'
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22831_hires.jpg'

### Mars Weather

* Visit the Mars Weather twitter account (https://twitter.com/marswxreport?lang=en) and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called `mars_weather`.


In [12]:
# visit Mars Weather twitter
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [13]:
# convert the browser.html to a soup
html = browser.html
weather_soup = BeautifulSoup(html, 'html.parser')

In [14]:
# this only works if the 1st tweet is the weather which it usually is but not this time around
mars_weather = weather_soup.select_one("p.TweetTextSize").text
mars_weather

'InSight sol 303 (2019-10-03) low -103.9ºC (-155.1ºF) high -26.5ºC (-15.7ºF)\nwinds from the SSE at 4.9 m/s (11.0 mph) gusting to 19.1 m/s (42.8 mph)\npressure at 7.20 hPa'

In [15]:
# First, find a tweet with the data-name `Mars Weather`
mars_weather_tweet_container = weather_soup.find('ol', attrs={"id": "stream-items-id"})
mars_weather_tweet_container

<ol class="stream-items js-navigable-stream" id="stream-items-id">
<li class="js-stream-item stream-item stream-item" data-item-id="1180465422367371265" data-item-type="tweet" data-suggestion-json='{"suggestion_details":{},"tweet_ids":"1180465422367371265","scribe_component":"tweet"}' id="stream-item-tweet-1180465422367371265">
<div class="tweet js-stream-tweet js-actionable-tweet js-profile-popup-actionable dismissible-content original-tweet js-original-tweet" data-conversation-id="1180465422367371265" data-disclosure-type="" data-follows-you="false" data-item-id="1180465422367371265" data-name="Mars Weather" data-permalink-path="/MarsWxReport/status/1180465422367371265" data-reply-to-users-json='[{"id_str":"786939553","screen_name":"MarsWxReport","name":"Mars Weather","emojified_name":{"text":"Mars Weather","emojified_text_as_html":"Mars Weather"}}]' data-screen-name="MarsWxReport" data-tweet-id="1180465422367371265" data-tweet-nonce="1180465422367371265-6315a0c0-d4c0-4fd2-8a9f-9ac6b

In [16]:
# Next, search within the tweet for the p tag containing the tweet text
mars_weather_tweet_texts = mars_weather_tweet_container.findAll('p', 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')
mars_weather_tweet_texts

[<p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">InSight sol 303 (2019-10-03) low -103.9ºC (-155.1ºF) high -26.5ºC (-15.7ºF)
 winds from the SSE at 4.9 m/s (11.0 mph) gusting to 19.1 m/s (42.8 mph)
 pressure at 7.20 hPa</p>,
 <p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="4" lang="en">Our atmosphere is beautiful<a class="twitter-timeline-link u-hidden" data-expanded-url="https://twitter.com/rtphokie/status/1180266075289718784" dir="ltr" href="https://t.co/v9lJIuip53" rel="nofollow noopener" target="_blank" title="https://twitter.com/rtphokie/status/1180266075289718784"><span class="tco-ellipsis"></span><span class="invisible">https://</span><span class="js-display-url">twitter.com/rtphokie/statu</span><span class="invisible">s/1180266075289718784</span><span class="tco-ellipsis"><span class="invisible"> </span>…</span></a></p>,
 <p class="TweetTextSize TweetTextSize--normal js-twee

In [17]:
# Loop through our tweet texts to find 'InSight sol' 
weather_data = None

for t_text in mars_weather_tweet_texts:
    # get a list of all words in each tweet text
    words = t_text.get_text().split(' ')
    if words[0] == 'InSight':
        weather_data = t_text.get_text()
        break
    print('---')

In [18]:
weather_data

'InSight sol 303 (2019-10-03) low -103.9ºC (-155.1ºF) high -26.5ºC (-15.7ºF)\nwinds from the SSE at 4.9 m/s (11.0 mph) gusting to 19.1 m/s (42.8 mph)\npressure at 7.20 hPa'

### Mars Facts

* Visit the Mars Facts webpage (https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.

In [19]:
# visit Mars Facts url
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [20]:
# convert the browser.html to a soup
html = browser.html
facts_soup = BeautifulSoup(html, 'html.parser')

In [21]:
import numpy as np
import pandas as pd
from pandas import *

In [22]:
# convert the html to a dataframe
df = pd.read_html('https://space-facts.com/mars/')[0]
df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-153 to 20 °C,-88 to 58°C


In [23]:
# change the column names and make the Description the index
df.columns = ['Description', 'Mars', 'Earth']
df.set_index('Description', inplace=True)
df

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-153 to 20 °C,-88 to 58°C


In [24]:
# we are only interested in the Description & Mars columns
mars = df.iloc[:, 0]
mars.to_frame()


Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Diameter:,"6,779 km"
Mass:,6.39 × 10^23 kg
Moons:,2
Distance from Sun:,"227,943,824 km"
Length of Year:,687 Earth days
Temperature:,-153 to 20 °C


In [25]:
# convert the mars series back to a dataframe
mars = pd.DataFrame(mars)
mars

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Diameter:,"6,779 km"
Mass:,6.39 × 10^23 kg
Moons:,2
Distance from Sun:,"227,943,824 km"
Length of Year:,687 Earth days
Temperature:,-153 to 20 °C


In [26]:
mars.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n  </tbody>\n</table>'

### Mars Hemispheres

* Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

```python
### Example:
hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": "..."},
    {"title": "Cerberus Hemisphere", "img_url": "..."},
    {"title": "Schiaparelli Hemisphere", "img_url": "..."},
    {"title": "Syrtis Major Hemisphere", "img_url": "..."},
]

In [27]:
# visit the Mars Hemispheres site
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [28]:
# convert the browser.html to a soup
html = browser.html
hsoup = BeautifulSoup(html, 'html.parser')

In [29]:
links = [a['href'] for a in hsoup.find_all('a', href=True)]
links


['https://www.usgs.gov/centers/astrogeo-sc',
 'https://nasa.gov',
 'https://pds-imaging.jpl.nasa.gov/',
 '/search',
 '/search/map/Mars/Viking/cerberus_enhanced',
 '/search/map/Mars/Viking/cerberus_enhanced',
 '/search/map/Mars/Viking/schiaparelli_enhanced',
 '/search/map/Mars/Viking/schiaparelli_enhanced',
 '/search/map/Mars/Viking/syrtis_major_enhanced',
 '/search/map/Mars/Viking/syrtis_major_enhanced',
 '/search/map/Mars/Viking/valles_marineris_enhanced',
 '/search/map/Mars/Viking/valles_marineris_enhanced',
 'http://isis.astrogeology.usgs.gov',
 'http://planetarynames.wr.usgs.gov',
 'https://astrogeology.usgs.gov/tools/map-a-planet-2',
 'https://www.usgs.gov/centers/astrogeo-sc/science/cartography-and-imaging-sciences-node-nasa-planetary-data-system',
 'https://www.usgs.gov/centers/astrogeo-sc/science/regional-planetary-image-facility-rpif',
 'https://www.usgs.gov/centers/astrogeo-sc/science/usgsnasa-planetary-photogrammetry-guest-facility',
 'http://pilot.wr.usgs.gov',
 'https://ww

In [30]:
# hsoup.find('a').attrs['href']

In [31]:
base_url = 'https://astrogeology.usgs.gov'

In [32]:
cerebrus_url = f'{base_url}{links[4]}'
cerebrus_url

'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'

In [33]:
schiaparelli_url = f'{base_url}{links[6]}'
schiaparelli_url

'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'

In [34]:
syrtis_major_url = f'{base_url}{links[8]}'
syrtis_major_url

'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'

In [35]:
valles_marineris_url = f'{base_url}{links[10]}'
valles_marineris_url

'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'

In [36]:
# visit the Cerebrus site
url = cerebrus_url
browser.visit(url)

In [37]:
# convert the browser.html to a soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [38]:
cerberus = soup.select_one('li a')['href']
cerberus

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'

In [39]:
# visit the schiaparelli_url 
url = schiaparelli_url 
browser.visit(url)

In [40]:
# convert the browser.html to a soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [41]:
schiaparelli = soup.select_one('li a')['href']
schiaparelli

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'

In [42]:
# visit the syrtis_major_url 
url = syrtis_major_url 
browser.visit(url)

In [43]:
# convert the browser.html to a soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [44]:
syrtis_major  = soup.select_one('li a')['href']
syrtis_major

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'

In [45]:
# visit the valles_marineris_url 
url = valles_marineris_url  
browser.visit(url)

In [46]:
# convert the browser.html to a soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [47]:
valles_marineris  = soup.select_one('li a')['href']
valles_marineris

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'

In [48]:
# Append the dictionary with the image url & hemisphere title to a list.
# This list will contain one dictionary for each hemisphere.
hemisphere_image_urls = [
    {'title': 'Cerberus Hemisphere', 'img_url': cerberus},
    {'title': 'Schiaparelli Hemisphere', 'img_url': schiaparelli},
    {'title': 'Syrtis Major Hemisphere', 'img_url': syrtis_major},
    {'title': 'Valles Marineris Hemisphere', 'img_url': valles_marineris}
]
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [49]:
import os

os.system('jupyter nbconvert --to html mission_to_mars.ipynb')

0