In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
from splinter import Browser

In [2]:
newsUrl = 'https://mars.nasa.gov/news/'
picUrl = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
weatherUrl = 'https://twitter.com/marswxreport?lang=en'
factsUrl = 'https://space-facts.com/mars/'
hemisUrl = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [3]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
news_response = requests.get(newsUrl)

In [5]:
news_soup = bs(news_response.text, 'html.parser')

In [6]:
#print(news_soup.prettify())

In [7]:
news_results = news_soup.body.find_all('div', class_='slide')
#print(len(news_results))
news_list = []

In [8]:
counter = 1

for result in news_results:
    title = result.find('div', class_='content_title').find('a').text.strip()
    body = result.find('div', class_='rollover_description_inner').text.strip()
    article_information = {
        'article number': counter,
        'article title' : title,
        'articly summary' : body
    }
    
    news_list.append(article_information)
    counter += 1
    
print(news_list)

[{'article number': 1, 'article title': 'Opportunity Hunkers Down During Dust Storm', 'articly summary': "It's the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA's Opportunity rover to recharge its batteries and phone home."}, {'article number': 2, 'article title': 'NASA Finds Ancient Organic Material, Mysterious Methane on Mars', 'articly summary': 'NASA’s Curiosity rover has found evidence on Mars with implications for NASA’s search for life.'}, {'article number': 3, 'article title': 'NASA Invests in Visionary Technology', 'articly summary': 'NASA is investing in technology concepts, including several from JPL, that may one day be used for future space exploration missions.'}, {'article number': 4, 'article title': 'NASA is Ready to Study the Heart of Mars', 'articly summary': 'NASA is about to go on a journey to study the center of Mars.'}, {'article number': 5, 'article title': '

In [9]:
browser.visit(picUrl)

In [10]:
pic_html = browser.html
pic_soup = bs(pic_html, 'html.parser')

pic_style = pic_soup.find('article', class_='carousel_item')['style']

In [11]:
pic_style = pic_style.split("url")[1]
pic_link = pic_style.split("'")[1]

In [12]:
pic_link_final = 'https://www.jpl.nasa.gov' + pic_link
print(pic_link_final)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18432-1920x1200.jpg


In [13]:
weather_response = requests.get(weatherUrl)
weather_soup = bs(weather_response.text, 'html.parser')
#print(weather_soup.prettify())

In [14]:
weather_results = weather_soup.find_all('div', class_='content')

In [15]:
weather_text = ''
for result in weather_results:
    try:
        href = result.find('a').attrs['href']
        if href=='/MarsWxReport':
            content = result.find('div', class_='js-tweet-text-container').find('p').text.strip()
            if 'Sol' in content:
                weather_text = content
                break
    except AttributeError as error:
        print(error)

print(weather_text)

Sol 2174 (2018-09-17), high -22C/-7F, low -68C/-90F, pressure at 8.96 hPa, daylight 05:45-18:01


In [16]:
facts = pd.read_html(factsUrl)

In [17]:
facts_df = facts[0]
facts_df = facts_df.rename(columns={0 : 'Variable', 1 : 'Value'})
facts_df = facts_df.set_index('Variable')

In [18]:
table_headers = list(facts_df.index.values)
table_values = list(facts_df['Value'].values)

In [19]:
facts_df.to_html('facts.html')
html_facts_df = facts_df.to_html()
html_facts_df = html_facts_df.replace('\n', '')
html_facts_df

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Variable</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [20]:
browser.visit(hemisUrl)

In [21]:
hemi_html = browser.html
hemi_soup = bs(hemi_html, 'html.parser')

hemi_results = hemi_soup.find_all('div', class_='item')

In [22]:
link_array = []
title_array = []
for result in hemi_results:
    title = result.find('h3').text
    link = 'https://astrogeology.usgs.gov' + result.find('a').attrs['href']
    link_array.append(link)
    title_array.append(title)

In [23]:
final_hemi_img = []
for link in link_array:
    browser.visit(link)
    link_html = browser.html
    soup = bs(link_html, 'html.parser')
    result = soup.find('div', class_='downloads').find('a').attrs['href']
    final_hemi_img.append(result)

In [24]:
hemi_data = []
for title, link in zip (title_array, final_hemi_img):
    info = {
        'Image Hemisphere Title' : title,
        'Image Hemisphere Link' : link
    }
    hemi_data.append(info)
print(hemi_data)

[{'Image Hemisphere Title': 'Cerberus Hemisphere Enhanced', 'Image Hemisphere Link': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'Image Hemisphere Title': 'Schiaparelli Hemisphere Enhanced', 'Image Hemisphere Link': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'Image Hemisphere Title': 'Syrtis Major Hemisphere Enhanced', 'Image Hemisphere Link': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'Image Hemisphere Title': 'Valles Marineris Hemisphere Enhanced', 'Image Hemisphere Link': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [25]:
#news_list, weather_text, pic_link_final, html_facts_df, hemi_data

In [26]:
import json

In [27]:
information = {
    'News' : news_list,
    'Weather' : weather_text,
    'Main Image' : pic_link_final,
    'Table HTML' : html_facts_df,
    'Hemisphere Data' : hemi_data
}
print(json.dumps(information, indent=4))

{
    "News": [
        {
            "article number": 1,
            "article title": "Opportunity Hunkers Down During Dust Storm",
            "articly summary": "It's the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA's Opportunity rover to recharge its batteries and phone home."
        },
        {
            "article number": 2,
            "article title": "NASA Finds Ancient Organic Material, Mysterious Methane on Mars",
            "articly summary": "NASA\u2019s Curiosity rover has found evidence on Mars with implications for NASA\u2019s search for life."
        },
        {
            "article number": 3,
            "article title": "NASA Invests in Visionary Technology",
            "articly summary": "NASA is investing in technology concepts, including several from JPL, that may one day be used for future space exploration missions."
        },
        {
           