In [1]:
from bs4 import BeautifulSoup
import pandas as pd
from splinter import Browser

In [2]:
#Mac Users
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

/usr/local/bin/chromedriver


In [3]:
#News
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

html = browser.html
mars_news_soup = BeautifulSoup(html, 'html.parser')

In [4]:
#latest article title
news_title = mars_news_soup.find('div', class_='content_title').text
news_title

'Small Satellite Mission of the Year'

In [5]:
#latest article paragraph
news_p = mars_news_soup.find('div', class_='article_teaser_body').text
news_p

'The first interplanetary CubeSats were recognized by the engineering community with the 2019 Small Satellite Mission of the Year award.'

In [6]:
#Image
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [7]:
#click "Full Image" button
browser.click_link_by_partial_text('FULL IMAGE')

In [8]:
#click "more info" button
browser.click_link_by_partial_text('more info')

In [9]:
html = browser.html
image_soup = BeautifulSoup(html, 'html.parser')

featured_image_route = image_soup.find('figure', class_='lede').a['href']
featured_image_route

'/spaceimages/images/largesize/PIA20063_hires.jpg'

In [10]:
featured_image_url = f'https://www.jpl.nasa.gov{featured_image_route}'
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA20063_hires.jpg'

In [11]:
#Twitter
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

html = browser.html
tweet_soup = BeautifulSoup(html, 'html.parser')

In [12]:
mars_weather = tweet_soup.find('p', class_='TweetTextSize').text
mars_weather

'InSight sol 250 (2019-08-10) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.1ºF)\nwinds from the SSE at 4.4 m/s (9.8 mph) gusting to 16.2 m/s (36.2 mph)\npressure at 7.60 hPapic.twitter.com/9sZRRUi3dm'

In [13]:
#Facts
url = 'https://space-facts.com/mars/'

mars_df = pd.read_html(url)[1]
mars_df.columns = ['Property', 'Value']
mars_df.set_index('Property', inplace = True)
mars_df

Unnamed: 0_level_0,Value
Property,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [14]:
mars_df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Property</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  

In [15]:
#Hemispheres
#The website listed was down at the time of the assignment. An archive link was used as an alternative. 

# url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

url = 'https://web.archive.org/web/20181114171728/https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

html = browser.html
hemi_soup = BeautifulSoup(html, 'html.parser')

In [16]:
# Using a loop and empty list to get the titles
# Then search for the image URLs based on the titles

links = hemi_soup.find_all('h3')

hemi_titles = [hemi.text for hemi in links]
hemi_titles

# hemi_titles = []
# for hemi in links:
#     hemi_titles.append(hemi.text)

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [17]:
hemisphere_image_urls = []
counter = 0

print(f"Starting: \n{'-'*20}")

for hemi in hemi_titles:
    hemi_dict = {}
    
    browser.click_link_by_partial_text(hemi)
    hemi_dict['title'] = hemi
    hemi_dict['img_url'] = browser.find_by_text('Sample').first['href']
    hemisphere_image_urls.append(hemi_dict)
#     print(hemi_dict) --- this returns the same dictionary as the below line:
    print(f"{hemisphere_image_urls[counter]} \n{'-'*20}")
    
    counter += 1
    browser.back()

print(f"Done. {counter} hemispheres found.")

Starting: 
--------------------
{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://web.archive.org/web/20181114182238/http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'} 
--------------------
{'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://web.archive.org/web/20181114182242/http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'} 
--------------------
{'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://web.archive.org/web/20181114182245/http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'} 
--------------------
{'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://web.archive.org/web/20181114182248/http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'} 
--------------------
Done. 4 hemispheres found.


In [18]:
# Using Splinter's 'find_by_css'
# This is used in scrape_mars.py

hemisphere_image_urls = []
links = browser.find_by_css("a.product-item h3")

for item in range(len(links)):
    hemi_dict = {}
    
    browser.find_by_css("a.product-item h3")[item].click()
    hemi_dict["title"] = browser.find_by_css("h2.title").text
    hemi_dict["img_url"] = browser.find_link_by_text("Sample").first["href"]
    hemisphere_image_urls.append(hemi_dict)
    browser.back()

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://web.archive.org/web/20181114182238/http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://web.archive.org/web/20181114182242/http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://web.archive.org/web/20181114182245/http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://web.archive.org/web/20181114182248/http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]