In [1]:
import pandas as pd
from lxml import html
from bs4 import BeautifulSoup
from splinter import Browser
import requests as req
import shutil
import matplotlib.pyplot as plt

In [2]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

### Nasa News Data

In [3]:
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [4]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [5]:
results = soup.find_all('ul',class_="item_list")


# There is a better way to not do this with a for loop, but it works and gives you the option to pull more records.

for result in results:
    title = result.find('div', class_='content_title')

    body = result.find('div', class_='article_teaser_body')

    date = result.find("div", class_="list_date")

    if (title and body and date):
        print(date.text)
        print('-------------')
        print(f'{title.text} \n')
        print(body.text)
    else:
        print('lol I messed up :(')

May 15, 2019
-------------
NASA's MRO Completes 60,000 Trips Around Mars 

The orbiting spacecraft is also about to set a record for data relayed from the Martian surface.


### Image Data

In [6]:
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)

image_html = browser.html
i_soup = BeautifulSoup(image_html, 'html.parser')

In [7]:
image = i_soup.find("img", class_="thumb")["src"]
img_link = "https://jpl.nasa.gov"+image
featured_image_url = img_link

In [8]:
# res = req.get(img_link, stream=True)
# with open('img.jpg', 'wb') as out_file:
#     shutil.copyfileobj(res.raw, out_file)
    
img_link 

'https://jpl.nasa.gov/spaceimages/images/wallpaper/PIA23221-640x350.jpg'

### Twitter Data

In [9]:
twitter_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(twitter_url)

twitter_html = browser.html
t_soup = BeautifulSoup(twitter_html, 'html.parser')

In [10]:
twitter_res = t_soup.find_all('div', class_='js-tweet-text-container')[0].text

In [11]:
# This cleans text and removes the link from the image coming in.
twitter_res.replace('\n','').split('pic.twitter')[0]

'InSight sol 167 (2019-05-17) low -100.5ºC (-148.9ºF) high -20.4ºC (-4.6ºF)winds from the SW at 4.7 m/s (10.6 mph) gusting to 13.5 m/s (30.3 mph)pressure at 7.50 hPa'

### Mars Facts

In [12]:
facts_url = 'https://space-facts.com/mars/'

In [13]:
# we had the [0] at the end to remove the dataframe from the list
facts_df = pd.read_html(facts_url)[0]
facts_df.columns = ['Records:','Data']

In [14]:
facts_df.set_index('Records:')

Unnamed: 0_level_0,Data
Records:,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [15]:
facts_html = facts_df.to_html('facts_html')

In [16]:
facts_html

### Mars Hemispheres

In [17]:
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemi_url)

hemi_html = browser.html
h_soup = BeautifulSoup(hemi_html, 'html.parser')

# base for attaching hrefs to make a full url
base='https://astrogeology.usgs.gov'

In [18]:
image_paths = h_soup.find_all('div',class_='item')

In [19]:
paths = []
for path in image_paths:
    p = path.a['href']
    paths.append(base + p)

In [20]:
orginal_img_paths = []
titles = []
hemi_img_urls = []

for path in paths:

    browser.visit(path)
    path_html = browser.html
    p_soup = BeautifulSoup(path_html, 'html.parser')
    
    title = p_soup.find('h2',class_='title').text
    titles.append(title)
    
    for a in p_soup.find_all('a'):
        if a.text == 'Original':
            orginal_img_paths.append(a['href'])
            hemi_dict = {'title' :title,'img_url':a['href']}
            hemi_img_urls.append(hemi_dict)

In [21]:
hemi_img_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]

In [24]:
browser.quit()