## Mission to Mars

* Scrape the NASA Mars News Site and collect the latest News Title and Paragragh Text. Assign the text to variables that you can reference later.

In [2]:
from bs4 import BeautifulSoup as bs
from splinter import Browser
from selenium import webdriver
import time

In [3]:
# initialize chrome browser
browser = Browser('chrome', headless=False)

In [4]:
# NASA website to be scrapped
url = "https://mars.nasa.gov/news"

In [5]:
# visit the page to be scrapped and wait for it to load
browser.visit(url)
time.sleep(2)

In [6]:
# Create BeautifulSoup object
html = browser.html
soup = bs(html, 'html.parser') 

In [7]:
# find the latest News Headline on the Nasa - mars - news page.
news_title = soup.find('div',class_="content_title").a.text

In [8]:
news_title

"NASA's First Deep-Space CubeSats Say: 'Polo!'"

In [9]:
# find the text for the latest news headline.
news_p = soup.find('div', class_='article_teaser_body').text
news_p

"MarCO is a pair of tiny spacecraft that launched with NASA's InSight lander today."

### JPL Mars Space Images - Featured Image

In [10]:
# url for the images website.
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
# visit the images page.  NOTE:- do not close the browser opened above. If so u have to initiate the browser again.
browser.visit(image_url)
# Create BeautifulSoup object
img_html = browser.html
soup = bs(img_html, 'html.parser')

In [11]:
# find the a tag for the featured image
href = soup.find('footer').a['data-fancybox-href']

In [12]:
# append with the base url to get the full URL for the featured image
featured_image_url = 'https://www.jpl.nasa.gov'+href
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17838_ip.jpg'

In [13]:
browser.visit(featured_image_url) 
#visit the featured image using the URL above to verify that the featured image is retrieved accurately.

### Mars Weather

In [14]:
# URL for Mars Weather Twitter page.
url = 'https://twitter.com/marswxreport?lang=en'

In [15]:
# initialize the browser.
#browser = Browser('chrome', headless=False)
#visit the Mars Weather Twitter page
browser.visit(url)
# Create BeautifulSoup object
html = browser.html
soup = bs(html, 'html.parser')

In [16]:
#find the first tweet on the page and retrieve its text.
mars_weather = soup.find('p', class_=  'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text

In [17]:
mars_weather

'Sol 2045 (May 08, 2018), Sunny, high -7C/19F, low -74C/-101F, pressure at 7.33 hPa, daylight 05:22-17:20'

### Mars Facts

In [18]:
# import pandas
import pandas as pd

In [47]:
#URL for mars facts.
url = 'https://space-facts.com/mars'

In [54]:
# use pandas to read the above page and retrieve the firt table on the page with mars facts.
table = pd.read_html(url)
table[0][0]

0    Equatorial Diameter:
1         Polar Diameter:
2                   Mass:
3                  Moons:
4         Orbit Distance:
5           Orbit Period:
6    Surface Temperature:
7           First Record:
8            Recorded By:
Name: 0, dtype: object

In [58]:
# convert the retrieved table into Data Frame
df = pd.DataFrame(table[0])
df.columns = ['Fact', 'Value']
df.head()
df.to_dict()

{'Fact': {0: 'Equatorial Diameter:',
  1: 'Polar Diameter:',
  2: 'Mass:',
  3: 'Moons:',
  4: 'Orbit Distance:',
  5: 'Orbit Period:',
  6: 'Surface Temperature:',
  7: 'First Record:',
  8: 'Recorded By:'},
 'Value': {0: '6,792 km',
  1: '6,752 km',
  2: '6.42 x 10^23 kg (10.7% Earth)',
  3: '2 (Phobos & Deimos)',
  4: '227,943,824 km (1.52 AU)',
  5: '687 days (1.9 years)',
  6: '-153 to 20 °C',
  7: '2nd millennium BC',
  8: 'Egyptian astronomers'}}

### Mars Hemispheres

In [26]:
# astrogeology website URL for mars hemispheres
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
base_url = 'https://astrogeology.usgs.gov'

In [27]:
#browser = Browser('chrome', headless=False) # opens a new browser, if u have already closed ur old browser
browser.visit(url) # visit the astrogeology website

In [28]:
html = browser.html
soup = bs(html, 'html.parser') # intiates soup.

In [29]:
# scrape the page to retrieve the mars hemispheres list displayed on the website
links = soup.find_all('div', class_='description')
# intialize a 'hemisphere_image_urls' list variable to hold the image dictionaries
hemisphere_image_urls =[] 
# loop thru the hemispheres list retrieved
for link in links:
    image_url = {} # initialize dictionary to hold the title and image urls for the hemisphere
    href = base_url+link.a['href'] # link to the hemisphere page
    #browser = Browser('chrome', headless=False) 
    browser.visit(href) # opens the mar hemisphere in browser.
    time.sleep(1) # let the page load
    html=browser.html
    soup = bs(html, 'html.parser') # create a soup object for the page
    title = (soup.title.text).split('|')[0] # title of the mars hemisphere
    image_url['title'] = title # add the title to the dictionary
    divs = soup.find_all('div', class_ = 'downloads') #find downloadable image links for the mars hemisphere
    list_items = divs[0].ul.find_all('li') # find all list items in the div 
    # loop thru the list_items and retrieve the image URL for the Original image.
    for item in list_items:
        if (item.a.text).lower() == 'original':
            print(item.a['href'])
            image_url['image_url'] = item.a['href'] # add the image URL to the image_url dictionary
    hemisphere_image_urls.append(image_url) # append the image_url dictionary to the the hemisphere list.

hemisphere_image_urls # print the hemisphere list

http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif


[{'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif',
  'title': 'Cerberus Hemisphere Enhanced '},
 {'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif',
  'title': 'Schiaparelli Hemisphere Enhanced '},
 {'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif',
  'title': 'Syrtis Major Hemisphere Enhanced '},
 {'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif',
  'title': 'Valles Marineris Hemisphere Enhanced '}]

In [31]:
mars_info_set = {"latest_news_title": news_title,
"latest_news_text": news_p,
"featured_image": featured_image_url,
"weather" : mars_weather,
"mars_facts": df,
"hemispheres": hemisphere_image_urls
}


In [32]:
mars_info_set

{'featured_image': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17838_ip.jpg',
 'hemispheres': [{'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif',
   'title': 'Cerberus Hemisphere Enhanced '},
  {'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif',
   'title': 'Schiaparelli Hemisphere Enhanced '},
  {'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif',
   'title': 'Syrtis Major Hemisphere Enhanced '},
  {'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif',
   'title': 'Valles Marineris Hemisphere Enhanced '}],
 'latest_news_text': "MarCO is a pair of tiny spacecraft that launched with NASA's InSight lander today.",
 'latest_news_title': "NASA's First Deep-Space CubeSats Say: 'Polo!'",
 'mars_facts':                    Fact                          Value
 0  Equatorial Diam

In [34]:
keys = mars_info_set.keys()

In [35]:
keys

dict_keys(['latest_news_title', 'latest_news_text', 'featured_image', 'weather', 'mars_facts', 'hemispheres'])

In [41]:
df.dtypes
    

Fact     object
Value    object
dtype: object

In [44]:
df = df.astype(str)

In [45]:
df.dtypes

Fact     object
Value    object
dtype: object