In [2]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup

In [3]:
# Path to chromedriver
!which chromedriver

/usr/local/bin/chromedriver


In [4]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path)
print(browser)

<splinter.driver.webdriver.chrome.WebDriver object at 0x111c4e2b0>


## Visit the NASA mars news site


In [5]:
# Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

# Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [6]:
# Convert the browser html to a soup object and then quit the browser
html = browser.html
news_soup = BeautifulSoup(html, 'html.parser')

slide_elem = news_soup.select_one('ul.item_list li.slide')

In [7]:
slide_elem.find("div", class_='content_title')

<div class="content_title"><a href="/news/8360/six-things-about-opportunitys-recovery-efforts/" target="_self">Six Things About Opportunity's Recovery Efforts</a></div>

In [8]:
# Use the parent element to find the first a tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title')
news_title.get_text()

"Six Things About Opportunity's Recovery Efforts"

In [9]:
# Use the parent element to find the paragraph text
list_text = slide_elem.find("div", class_='list_text')
title = list_text.find("div", class_='content_title')
title.get_text()
teaser = list_text.find("div", class_='article_teaser_body')
teaser.get_text()

'The global dust storm on Mars could soon let in enough sunlight for the Opportunity rover to recharge.'

## JPL Space Images Featured Image

### Define make_soup / Mars Hemispheres

In [4]:
import urllib
import urllib.request
from bs4 import BeautifulSoup

In [5]:
def make_soup(url):
    thepage = urllib.request.urlopen(url)
    soupdata = BeautifulSoup(thepage,"html.parser")
    return soupdata

In [31]:
# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
soup = make_soup(url)

In [37]:
# Find and click the full image button
result = soup.find('a', class_="button fancybox")
result = result.get("data-fancybox-href")
featured_image_url = "https://www.jpl.nasa.gov" + result
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA09113_ip.jpg'

In [40]:
imagefile = open("featuredImg"+".jpeg",'wb')
imagefile.write(urllib.request.urlopen(featured_image_url).read()) 
imagefile.close()

## Mars Weather


In [22]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
import requests

In [23]:
response = requests.get(url)

In [24]:
soup = BeautifulSoup(response.text, 'html.parser')

In [25]:
print(soup.prettify())

<!DOCTYPE html>
<html data-scribe-reduced-action-queue="true" lang="en">
 <head>
  <meta charset="utf-8"/>
  <script nonce="ha1F5bSI5JZ3FmaQVnveyQ==">
   !function(){window.initErrorstack||(window.initErrorstack=[]),window.onerror=function(r,i,n,o,t){r.indexOf("Script error.")>-1||window.initErrorstack.push({errorMsg:r,url:i,lineNumber:n,column:o,errorObj:t})}}();
  </script>
  <script id="bouncer_terminate_iframe" nonce="ha1F5bSI5JZ3FmaQVnveyQ==">
   if (window.top != window) {
  window.top.postMessage({'bouncer': true, 'event': 'complete'}, '*');
}
  </script>
  <script id="swift_action_queue" nonce="ha1F5bSI5JZ3FmaQVnveyQ==">
   !function(){function e(e){if(e||(e=window.event),!e)return!1;if(e.timestamp=(new Date).getTime(),!e.target&&e.srcElement&&(e.target=e.srcElement),document.documentElement.getAttribute("data-scribe-reduced-action-queue"))for(var t=e.target;t&&t!=document.body;){if("A"==t.tagName)return;t=t.parentNode}return i("all",o(e)),a(e)?(document.addEventListener||(e=o(

In [26]:
results = soup.find_all('div', class_="js-tweet-text-container")

In [27]:
mars_weather = results[0].get_text().strip()
mars_weather

'Sol 2142 (2018-08-15), high -10C/14F, low -71C/-95F, pressure at 8.65 hPa, daylight 05:28-17:41'

## Mars Facts

In [43]:
import pandas as pd

In [44]:
url = 'https://space-facts.com/mars/'

In [45]:
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [46]:
type(tables)

list

#### Panda DataFrame / Add Column names

In [47]:
df = tables[0]
df.columns = ['Fields','Data']
df.head()

Unnamed: 0,Fields,Data
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"


#### Table to HTML

In [48]:
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Fields</th>\n      <th>Data</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</t

In [49]:
soup = make_soup("https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars")

In [42]:
results = soup.find_all('a', class_="itemLink product-item")
hemisphere_image_urls = []
for result in results:
    title = {}
    title["title"]=result.h3.text.strip()
    hemisphere_image_urls.append(title)
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced'},
 {'title': 'Schiaparelli Hemisphere Enhanced'},
 {'title': 'Syrtis Major Hemisphere Enhanced'},
 {'title': 'Valles Marineris Hemisphere Enhanced'}]

#### Getting img urls

In [77]:
results=[]
urls = ["https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced",\
        "https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced",\
       "https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced",\
       "https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced"]
for url in urls:
    soup = make_soup(url)
    result = soup.find('img', class_="wide-image")
    results.append("https://astrogeology.usgs.gov"+ result.get('src'))
results

<img class="wide-image" src="/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg"/>
<img class="wide-image" src="/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg"/>
<img class="wide-image" src="/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg"/>
<img class="wide-image" src="/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg"/>


#### Combining with Hemispheree_image_url

In [72]:
counter = 0
for item in hemisphere_image_urls:
        item["img_url"] = results[counter]
        counter += 1
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

#### Downloading images

In [84]:
for i in range(len(urls)):
    imagefile = open(f"image{i}"+".jpeg",'wb')
    imagefile.write(urllib.request.urlopen(hemisphere_image_urls[i]["img_url"]).read())
    imagefile.close()