In [1]:
from bs4 import BeautifulSoup
import time
import requests
import pymongo
import pandas as pd
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
#Scrape news title and paragraph
url_1 = 'https://mars.nasa.gov/news/'
browser.visit(url_1)

In [4]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [5]:
#gotta narrow search down to "slide" div first
slide = soup.find('li', class_='slide')

In [6]:
title = slide.find('div', class_='content_title').text
title

"NASA's MAVEN Observes Martian Night Sky Pulsing in Ultraviolet Light"

In [7]:
paragraph = slide.find('div', class_='article_teaser_body').text
paragraph

'Vast areas of the Martian night sky pulse in ultraviolet light, according to images from NASA’s MAVEN spacecraft. The results are being used to illuminate complex circulation patterns in the Martian atmosphere.'

In [8]:
#Scrape image and image url
url_2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_2)

In [9]:
browser.click_link_by_partial_text('FULL IMAGE')



In [10]:
browser.click_link_by_partial_text('more info')

In [11]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [12]:
featured_image = soup.find('img', class_='main_image').attrs['src']
featured_image

'/spaceimages/images/largesize/PIA17009_hires.jpg'

In [13]:
featured_image_url = f"https://www.jpl.nasa.gov{featured_image}"
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17009_hires.jpg'

In [14]:
type(featured_image)

str

In [28]:
#Scrape weather
#Using requests since chromedriver is garbo for twitter
url_3='https://twitter.com/marswxreport?lang=en'
response=requests.get(url_3)
#set up parser
soup=BeautifulSoup(response.text, 'lxml')

In [29]:
##Twitter updated and code below no longer works
#Tweets are organized by div classes called 'tweet'
#soup.find_all('div', class_="tweet")[2]

In [30]:
#pick out attributes specific to mars weather, most recent will be the first result
soup.find_all("div", attrs={"class": "tweet", "data-name": "Mars Weather"})

[]

In [31]:
#pick out attributes specific to mars weather, most recent will be the first result
twitter_search=soup.find_all("div", attrs={"text": "sol"})
twitter_search

[]

In [17]:
# Mars facts
url_4 = 'https://space-facts.com/mars/'

In [18]:
tables = pd.read_html(url_4)

In [19]:
df = tables[0]
df.head(10)

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [20]:
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    <

In [21]:
#Mars Hemisphere 
url_5 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url_5)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [22]:
response=requests.get(url_5)
#set up parser
py_soup=BeautifulSoup(response.text, 'lxml')

In [23]:
items = py_soup.find("div", class_="collapsible results")

In [24]:
for a in items.find_all('a', href=True):
    print ("Found the URL:", a['href'])

Found the URL: /search/map/Mars/Viking/cerberus_enhanced
Found the URL: /search/map/Mars/Viking/schiaparelli_enhanced
Found the URL: /search/map/Mars/Viking/syrtis_major_enhanced
Found the URL: /search/map/Mars/Viking/valles_marineris_enhanced


In [25]:
hemisphere_image_urls = []
for a in items.find_all('a', href=True):
    hemisphere = {}
    Linku = ("https://astrogeology.usgs.gov"+ a['href'])
    browser.visit(Linku)
    time.sleep(1)
    hemisphere['title'] = browser.find_by_css("h2.title").text
    picturelinku = browser.find_link_by_text('Sample')
    hemisphere['img_url'] = picturelinku['href']
    hemisphere_image_urls.append(hemisphere)
    time.sleep(1)



In [26]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [27]:
browser.quit()