# Web Scraping - Mission to Mars

In [1]:
#import dependencies
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import pandas as pd
import time

### Mac Users

In [2]:
## executable path to driver for windows users
# executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
# browser = Browser('chrome', **executable_path, headless=False)

### Windows Users

In [3]:
#executable path to driver for windows users
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [4]:
#url of page to be scraped 
url_news = 'https://mars.nasa.gov/news/'

In [5]:
#visit the url through the spinter module and add 5 sec delay in the execution 
browser.visit(url_news)
time.sleep(5)

In [6]:
# HTML Object
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')


# Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. 
# Assign the text to variables that you can reference later.

news_title = soup.find_all('div', class_='content_title')[1]
#used the location for news_title as it was not scraping the "a" element of html in the given class.
#simply using news_title = soup.find('div', class_='content_title').find('a').text
#other way of doing it would be to call the main 'div' for the first news article and then use it to pull title and paragraph
#article = soup.find("div", class_='list_text')
news_title = news_title.text

news_p = soup.find(class_='article_teaser_body').text

print(f'news_title = {news_title} \nnews_p = {news_p}')

news_title = NASA's Perseverance Rover Goes Through Trials by Fire, Ice, Light and Sound 
news_p = The agency's new Mars rover is put through a series of tests in vacuum chambers, acoustic chambers and more to get ready for the Red Planet.


### JPL Mars Space Images - Featured Image

In [7]:
#url of page to be scraped 
url_img = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [8]:
# Use splinter to navigate the site
browser.visit(url_img)
time.sleep(5)
html1 = browser.html
soup1 = BeautifulSoup(html1, 'html.parser')

In [9]:
# find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
featured_image = soup1.find('article')['style'].replace("background-image: url('",'').replace("');",'')
featured_image

featured_image_url = (f'https://www.jpl.nasa.gov{featured_image}')
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA19347-1920x1200.jpg


### Mars Weather

In [10]:
# Visit the Mars Weather twitter account and scrape the latest Mars weather tweet from the page. 
url_weather = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url_weather)
time.sleep(5)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [11]:
# Save the tweet text for the weather report as a variable called mars_weather.
#scrape the mars twitter page
mars_weather = soup.find_all("article", attrs={"role":"article"})[0].text
#Replace all \n with space for continuity
mars_weather = mars_weather.replace('\n', ' ')
#Split to get rid of the initial part of the statement "Mars Weather@MarsWxReport·9h InSight "
mars_weather= mars_weather.split("InSight ")[1]
#mars_weather = mars_weather[40:]
#print out the latest weather tweet
print(mars_weather)

# mars_weather = soup.find_all("article", attrs={"role":"article"})
# mars_weather = lets_try[0].find_all("div")[0].find_all("div")[3]
# mars_weather = lets_try.find_all("div")[9].text
# mars_weather

# Twitter frequently changes how information is presented on their website. If you are having difficulty getting the correct html tag data, consider researching Regular Expression Patterns and how they can be used in combination with the .find() method.

sol 524 (2020-05-18) low -92.5ºC (-134.4ºF) high 0.5ºC (32.9ºF) winds from the SW at 4.8 m/s (10.7 mph) gusting to 15.3 m/s (34.3 mph) pressure at 7.00 hPa1318


### Mars Facts

In [20]:
# * Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
url_facts = 'https://space-facts.com/mars/'
browser.visit(url_facts)
# * Use Pandas to convert the data to a HTML table string.
table = pd.read_html(url_facts)[0]
#rename columns
table.columns=['Description', 'Values']
#set index
table.set_index('Description', inplace = True)
#convert to html
x = table.to_html()
table

Unnamed: 0_level_0,Values
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


### Mars Hemispheres

In [13]:
# * Visit the USGS Astrogeology site to obtain high resolution images for each of Mars hemispheres.
astro_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

browser.visit(astro_url)
time.sleep(5)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# * You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
hemispheres = soup.find_all(class_="description")
#print(hemispheres)

In [14]:
hemisphere_image_urls = []

base_url = 'https://astrogeology.usgs.gov'

for hemisphere in hemispheres:
    #scrape and store the title of the hemispheres. Replace the word enhanced
    title = hemisphere.find('h3').text
    title = title.replace('Enhanced', '')

    #scrape and store the partial url and visit the full url link and parse the HTML
    url = hemisphere.find('a')['href']
    browser.visit(base_url+url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    #get the partial url of the image and join with the base url
    image_url = soup.find('img',class_='thumb')['src']
    img_url = base_url+image_url

    #Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.
    hemisphere_image_urls.append({"title":title, "img_url":img_url})

hemisphere_image_urls



[{'title': 'Cerberus Hemisphere ',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png'},
 {'title': 'Schiaparelli Hemisphere ',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png'},
 {'title': 'Syrtis Major Hemisphere ',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/55a0a1e2796313fdeafb17c35925e8ac_syrtis_major_enhanced.tif_thumb.png'},
 {'title': 'Valles Marineris Hemisphere ',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/4e59980c1c57f89c680c0e1ccabbeff1_valles_marineris_enhanced.tif_thumb.png'}]