In [1]:
# Dependencies 
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import requests

In [2]:
# executable path to driver 
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Visit Nasa news url through splinter module
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [4]:
# HTML
html = browser.html

# Parse Beautiful Soup
soup = BeautifulSoup(html, 'lxml')

# latest news title and article paragraph
newsTitle = soup.find('div', class_='content_title').find('a').text
newsP = soup.find('div', class_='article_teaser_body').text

# Verify data 
print(newsTitle)
print(newsP)

InSight Captures Sunrise and Sunset on Mars
InSight joins the rest of NASA's Red Planet surface missions, all of which have photographed either the start or end of a Martian day.


In [5]:
# Mars Space Images through splinter module
featuredImageUrl = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(featuredImageUrl)

In [6]:
# HTML
htmlImage = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(htmlImage, 'lxml')

# Retrieve background-image url and remove the leading and trailing information
featuredImageUrl  = soup.find('article')['style'].replace('background-image: url(','').replace(');', '')[1:-1]

# Website Url 
mainUrl = 'https://www.jpl.nasa.gov'

# Create Web Link
featuredImageUrl = mainUrl + featuredImageUrl

# link of fullsize image
featuredImageUrl

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18886-1920x1200.jpg'

In [7]:
# Mars Weather Twitter 
weatherUrl = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weatherUrl)

In [8]:
# HTML 
htmlWeather = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(htmlWeather, 'lxml')

# Find all elements that contain tweets
latestTweet = soup.find_all('div', class_='js-tweet-text-container')

# loop through tweets only use if contain weather info
for tweet in latestTweet: 
    weatherTweet = tweet.find('p').text
    if 'Sol' and 'pressure' in weatherTweet:
        print(weatherTweet)
        break
    else: 
        pass

InSight sol 152 (2019-05-01) low -98.1ºC (-144.5ºF) high -17.2ºC (1.0ºF)
winds from the SW at 4.8 m/s (10.7 mph) gusting to 13.2 m/s (29.5 mph)
pressure at 7.40 hPapic.twitter.com/zJGo11uNF1


In [9]:
# Mars facts
factsUrl = 'http://space-facts.com/mars/'

# Pandas read_html
marsFacts = pd.read_html(factsUrl)

# Find the mars facts DataFrame
marsDf = marsFacts[0]

# Assign the columns
marsDf.columns = ['Description','Value']

# Save html code to folder Assets
marsDf.to_html('Mars_Data.html', index=False)

data = marsDf.to_dict(orient='records') 

# Display
marsDf

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [10]:
# hemispheres website through splinter module 
hemispheresUrl = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheresUrl)

In [67]:
# HTML Object
htmlHemispheres = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(htmlHemispheres, 'lxml')

# Retreive all items that contain mars hemispheres information
items = soup.find_all('div', class_='item')

# Create empty list 
hemisphereImageUrls = []

# main_ul 
hemispheresMainUrl = 'https://astrogeology.usgs.gov'

# Loop through
for img in items: 
    # title
    title = img.find('h3').text
    
    # full image links
    partialUrl = img.find('a', class_='itemLink product-item')['href']
    
    # go to full image website 
    browser.visit(hemispheresMainUrl + partialUrl)
    
    # HTML Object of individual hemisphere
    partialImgHtml = browser.html
    
    # Parse HTML with Beautiful Soup for every individual hemisphere information website 
    soup = BeautifulSoup(partialImgHtml, 'lxml')
    
    # Retrieve full image source 
    imgUrl = hemispheresMainUrl + soup.find('img', class_='wide-image')['src']
    
    # Append the retreived information into a list of dictionaries 
    hemisphereImageUrls.append({"title" : title, "img_url" : imgUrl})
    

# Display hemisphere_image_urls
hemisphereImageUrls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]