In [1]:
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import re
import time

In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News
Scraping the NASA Mars News Site to collect the latest news title and paragraph text

In [4]:
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
time.sleep(5)

In [5]:
soup = BeautifulSoup(browser.html, 'html.parser')

In [6]:
news = soup.find('li', class_="slide")

In [7]:
news_date = news.find('div', class_='list_date').text
news_date

'May  7, 2020'

In [8]:
news_title = news.find('div', class_="content_title").text
news_title

"NASA's Perseverance Rover Mission Getting in Shape for Launch"

In [9]:
news_p = news.find('div', class_="article_teaser_body").text
news_p

'Stacking spacecraft components on top of each other is one of the final assembly steps before a mission launches to the Red Planet. '

### JPL Mars Space Images - Featured Image
Using splinter to navigate the site and finding the full size '.jpg' image url for the current Featured Mars Image 

In [10]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
time.sleep(5)

In [11]:
soup = BeautifulSoup(browser.html, 'html.parser')

In [12]:
# time.sleep(1)
browser.links.find_by_partial_text('FULL IMAGE').click()

In [13]:
# time.sleep(1)
browser.links.find_by_partial_text('more info').click()

In [14]:
soup = BeautifulSoup(browser.html, 'html.parser')
partial_url = soup.figure.img['src']
featured_image_url = 'https://www.jpl.nasa.gov' + partial_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17474_hires.jpg'

### Mars Weather
Scraping the latest Mars weather tweet from from the Mars Weather twitter account

In [15]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [16]:
time.sleep(5)

In [17]:
soup = BeautifulSoup(browser.html, 'html.parser')

In [18]:
pattern = re.compile(r'InSight sol')
mars_weather = soup.find('span', text=pattern).text
mars_weather

'InSight sol 517 (2020-05-10) low -92.6ºC (-134.7ºF) high 1.2ºC (34.2ºF)\nwinds from the SW at 4.9 m/s (11.0 mph) gusting to 17.5 m/s (39.2 mph)\npressure at 7.00 hPa'

### Mars Facts
 Scraping Mars Facts webpage to get the table containing the Mars planet profile

In [19]:
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [20]:
soup = BeautifulSoup(browser.html, 'html.parser')

In [21]:
tables = pd.read_html(url)

In [22]:
mars_facts = tables[0]
mars_facts

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [23]:
mars_facts_table = mars_facts.to_html(index=False, header=False)
mars_facts_table

'<table border="1" class="dataframe">\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

### Mars Hemispheres
Scraping the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres

In [24]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
time.sleep(5)

In [25]:
soup = BeautifulSoup(browser.html, 'html.parser')

In [26]:
# Find all the four hemispheres of Mars
hemispheres = soup.find_all('div', class_='item')

In [27]:
# Declare variable to store result
hemisphere_image_urls = []

In [28]:
# Iterate over each hemisphere
for hemisphere in hemispheres:
    
    # Declare variable to store result
    hemisphere_url = {}
    
    # Find the title
    title = hemisphere.find('div', class_='description').h3.text
    hemisphere_url['title'] = title
    
    # Click on the title to navigate to page
    time.sleep(1)
    browser.find_by_text(title).click()
    
    # On the new page, get the html
    hemi_soup = BeautifulSoup(browser.html, 'html.parser')
    
    # Find the url to the full resolution '.jpg' image
    download = hemi_soup.find('div', class_="downloads")
    download_link = download.find('a')
    if download_link.text == 'Sample':
        img_url = download_link['href']
        hemisphere_url['img_url'] = img_url
    
    # Append hemisphere_url dict to a list of all hemisphere_image_urls
    hemisphere_image_urls.append(hemisphere_url)
  
        
    time.sleep(1)
    browser.back()

In [29]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [30]:
browser.quit()