In [1]:
#Import Dependencies
import time
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import requests
from selenium import webdriver
from sys import platform

In [2]:
# Initialize Things
def init_browser():
    if platform == "darwin":
        executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
    else:
        executable_path = {'executable_path': 'chromedriver.exe'}
    return Browser("chrome", **executable_path, headless=False)

In [3]:
# Defining URLs to use
nasa_url = 'https://mars.nasa.gov/news/'
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
twit_url = 'https://twitter.com/marswxreport?lang=en'
facts_url = 'https://space-facts.com/mars/'
astro_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [4]:
# Simply soup call for urls
def getResponse(url):
    return BeautifulSoup(requests.get(url).text, 'html.parser')

In [5]:
# Pull title and desc from NASA website
nasa_soup = getResponse(nasa_url)
title = nasa_soup.find('div', class_='content_title').get_text().strip('\n')
desc = nasa_soup.find('div', class_='rollover_description_inner').get_text().strip('\n')

In [6]:
# Image rip
img_soup = getResponse(img_url)
picture = img_soup.find(class_='carousel_item')['style'].lstrip('''background-image: url(''').rstrip(');')
fin_img_url = 'https://www.jpl.nasa.gov' + picture.strip("'")

In [7]:
# Twitter rip for weather
twit_soup = getResponse(twit_url)
result = twit_soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")
mars_weather = result.get_text()

In [22]:
# Pulling Table data
facts_df = pd.DataFrame(pd.read_html(facts_url)[0])
facts_df.columns = ['Category','Measurement']
facts_df.set_index('Category', inplace=True)
table_string = facts_df.to_html()
table_string

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Measurement</th>\n    </tr>\n    <tr>\n      <th>Category</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </

In [9]:
# Hemisphere imgs
astro_soup = getResponse(astro_url)
hemisphere_image_urls = []

links = ['https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
        'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
        'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
        'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']

for x in range(0, len(astro_soup.findAll(class_="item"))):
    img_link = {astro_soup.findAll(class_="item")[x].find('a').find('div').get_text(): links[x]}
    hemisphere_image_urls.append(img_link)

In [10]:
# Creating dict
mars_dict = {
    'news_title' : title,
    'news_summary' : desc, 
    'featured_image_url' : fin_img_url,
    'mars_weather' : mars_weather,
    'table_string' : table_string,
    'hemisphere_image_urls' : hemisphere_image_urls}

In [None]:
return mars_dict