In [2]:
from bs4 import BeautifulSoup as bs
from splinter import Browser

import requests 
import pymongo
import pandas as pd
import re

In [3]:
#######################################################################
### NASA Mars News ###
#######################################################################

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)

url_mars = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url_mars)

html = browser.html
soup = bs(html, 'html.parser')

# Iterate through each article found
# Tried this as well: response = requests.get(url_mars)
# But unles JavaScript is turned of for this website, 'press release' articles show up first, not the latest one (discussed with John and Bobby)
articles = soup.find_all('div', class_='list_text')

news_title = soup.find('div', class_='content_title').text
news_body = soup.find('div', class_='article_teaser_body').text

print("news_title: " + news_title)
print("news_body: " + news_body)

news_title: Media Get a Close-Up of NASA's Mars 2020 Rover
news_p: The clean room at NASA's Jet Propulsion Laboratory was open to the media to see NASA's next Mars explorer before it leaves for Florida in preparation for a summertime launch.


In [4]:
#######################################################################
### FEATURED IMAGE ###
#######################################################################

# URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Retrieve page with the requests module
response = requests.get(url)
soup = bs(response.text, 'lxml')

space_image = soup.article.a['data-fancybox-href']
print("Largest Image found: " + space_image)

# unless the background image is needed:
space_image_background = soup.article['style']

split_str = str.split(space_image_background, ' url(\'')
split_str = split_str[1]
split_str = str.split(split_str,')\'')
space_image_background = split_str[0]

print("Background image found: " + space_image_background)

#This may have changed in the mean time... only a mediumsize jpg on website
featured_image_url = 'https://www.jpl.nasa.gov' + space_image
print("featured_image_url: " + featured_image_url)

Largest Image found: /spaceimages/images/mediumsize/PIA14934_ip.jpg
Background image found: /spaceimages/images/wallpaper/PIA14934-1920x1200.jpg');
featured_image_url: https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14934_ip.jpg


In [6]:
#######################################################################
### Mars Weather ###
#######################################################################

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)# Mars Weather
# browser = init_browser()

# URL of page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
html = browser.html
soup = bs(html, 'html.parser')

mars_weather = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
print("mars_weather: " + mars_weather)

mars_weather: InSight sol 400 (2020-01-11) low -99.1ºC (-146.5ºF) high -15.7ºC (3.8ºF)
winds from the SSE at 5.5 m/s (12.3 mph) gusting to 22.3 m/s (49.9 mph)
pressure at 6.40 hPapic.twitter.com/xYQHT9cdn5


In [7]:
#######################################################################
### Mars Facts ###
#######################################################################

url = 'https://space-facts.com/mars/'
tables = pd.read_html(url)
facts = pd.DataFrame(tables[0])
facts.columns = ['Mars Profile:','']
facts = facts.set_index('Mars Profile:')
print("Mars Facts:")
print(facts)

facts.to_html("mars_facts.html")

Mars Facts:
                                                   
Mars Profile:                                      
Equatorial Diameter:                       6,792 km
Polar Diameter:                            6,752 km
Mass:                 6.39 × 10^23 kg (0.11 Earths)
Moons:                          2 (Phobos & Deimos)
Orbit Distance:            227,943,824 km (1.38 AU)
Orbit Period:                  687 days (1.9 years)
Surface Temperature:                   -87 to -5 °C
First Record:                     2nd millennium BC
Recorded By:                   Egyptian astronomers


In [9]:
#######################################################################
### Mars Hemispheres ###
#######################################################################

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)
# browser = init_browser()

url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

html = browser.html
soup = bs(html, 'html.parser')

# Find all links to clicked to obtain large image
results = soup.find_all('a', class_='itemLink product-item')
collection = []
for r in results:
    if r.h3:
#         print(r['href'])
        url = "https://astrogeology.usgs.gov" + r['href']
        collection.append(url)

# for each item in collections - find the large image by viewing the href. Then append to image list.

image_list = []
image_dict = {}

for c in collection:
    print("Now processing: " + c)
    browser.visit(c)
    
    html = browser.html
    soup = bs(html, 'html.parser')
    
    image_url = soup.li.a['href']
    title = soup.h2.text

    image_dict = {'hem_titel':title , 'hem_image_url':image_url}
    image_list.append(image_dict)
    
    browser.back

print("Image List:")
print(image_list)

mars_data = {}

mars_data['news_title'] = news_title
mars_data['news_body'] = news_body
mars_data['featured_image_url'] = featured_image_url
mars_data['mars_weather'] = mars_weather
mars_data['mars_facts_url'] = 'mars_facts.html'
mars_data['image_list'] = image_list

print("Mars Dictionary: ")
print(mars_data)

Now processing: https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced
Now processing: https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced
Now processing: https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced
Now processing: https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced
Image List:
[{'hem_titel': 'Cerberus Hemisphere Enhanced', 'hem_image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'hem_titel': 'Schiaparelli Hemisphere Enhanced', 'hem_image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'hem_titel': 'Syrtis Major Hemisphere Enhanced', 'hem_image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'hem_titel': 'Valles Marineris Hemisphere Enhanced', 'hem_image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viki