In [1]:
# Dependencies

from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import urllib.request
from splinter import Browser
import lxml.html
import re

In [43]:
### URL's to be scraped

# NASA Mars News Site - collect the latest News Title and Paragraph Text
news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
nasa_homeurl = 'https://mars.nasa.gov'

# JPL Mars Space Images
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
PIAStart = "https://www.jpl.nasa.gov/spaceimages/images/largesize/"
PIAEnd = "_hires.jpg"

# Mars Weather - grab latest weather from Mars Weather twitter account
weather_url = 'https://twitter.com/marswxreport?lang=en'

# Mars Facts - use Pandas to scrape the table containing facts (more details below)
facts_url = 'https://space-facts.com/mars/'

# Mars Hemisphere Image - create a dictionary with the image url string and the hemisphere title to a list (more details below)
Hems_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'


# Splinter Path
# Finally figured out why we needed splinter vs. requests.get, the target we were looking for isn't in the html
# source code, it is dynamically loaded as JS and isn't searchable unless the browser is opened with splinter.
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [40]:
##### NASA Mars News
##### Using new_url, find the top featured title and teaser for the story




browser.visit(news_url)
html = browser.html
soup = bs(html, 'lxml-xml')

featured_title =  soup.find('div', class_='content_title').a.text
featured_teaser = soup.find('div', class_='article_teaser_body').text
nasa_titlelink = re.findall('href="([^ ]*)', str(soup.find('div', class_='content_title')))
featured_link = nasa_homeurl+nasa_titlelink[0]

print("Nasa Featured Title is: ",featured_title)
print("Nasa Featured Title Teaser is: ",featured_teaser)
print("Link to the Nasa Featured Title is: ",featured_link)

Nasa Featured Title is:  NASA to Host Media Call on Next Mars Landing Site
Nasa Featured Title Teaser is:  NASA will host a media teleconference at 9 a.m. PST (noon EST) Monday, Nov. 19, to provide details about the Mars 2020 rover’s landing site on the Red Planet.
Link to the Nasa Featured Title is:  https://mars.nasa.gov/news/8386/nasa-to-host-media-call-on-next-mars-landing-site/"


In [156]:
######### JPL Mars Space Image URL (Largesize jpg)
browser.visit(jpl_url)
html = browser.html
soup = bs(html, 'lxml-xml')
picurlraw = soup.find('div', class_='carousel_items')

for link in picurlraw.find_all('a'):
    test99 = (link.get('data-fancybox-href'))

aspos = test99.find('PIA')
PIA = test99[aspos : (test99.find('_', aspos))]

featured_image_url = PIAStart + PIA + PIAEnd
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16919_hires.jpg'

In [116]:
######### Mars Twitter Weather URL
browser.visit(weather_url)
html = browser.html
soup = bs(html, 'lxml-xml')
tweetraw = soup.find('div', class_='js-tweet-text-container')

tweetfix = tweetraw.find('p')
tweetwords = []
for x in tweetfix:
    tweetwords.append(str(x))
mars_weather = tweetwords[0]
mars_weather

'Sol 2230 (2018-11-14), high -5C/23F, low -72C/-97F, pressure at 8.59 hPa, daylight 06:22-18:39'

In [127]:
######### Mars Facts - Scrape table with pandas

browser.visit(facts_url)
html = browser.html
soup = bs(html, 'lxml-xml')
tableraw = soup.find('table', attrs={'id':'tablepress-mars'})
table_rows = tableraw.find_all('tr')

dataList = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    dataList.append(row)
pd.DataFrame(dataList, columns=["A", "B"])

Unnamed: 0,A,B
0,Equatorial Diameter:,"6,792 km\n"
1,Polar Diameter:,"6,752 km\n"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)\n
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [190]:
######### Images of Mars Hemispheres ###############
HemisphereTitles = []
HemisphereLinks = []

# Cerverus
cerberusUrl = "https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced"
browser.visit(cerberusUrl)
html = browser.html
soup = bs(html, 'lxml-xml')
cerberusHome = soup.find('img', class_='wide-image')
cerberusStrRaw = (str(cerberusHome))
cerberusPart = cerberusStrRaw[(cerberusStrRaw.find('/cache')):(cerberusStrRaw.find('jpg')+3)]
cerberusLink = usgsStart + cerberusPart
cerberusTitle = soup.find('h2', class_='title').text
HemisphereTitles.append(cerberusTitle)
HemisphereLinks.append(cerberusLink)

#Schiaparelli
schiaparelliUrl = "https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced"
browser.visit(schiaparelliUrl)
html = browser.html
soup = bs(html, 'lxml-xml')
schiaparelliHome = soup.find('img', class_='wide-image')
schiapStrRaw = (str(schiaparelliHome))
schiapPart = schiapStrRaw[(schiapStrRaw.find('/cache')):(schiapStrRaw.find('jpg')+3)]
schiapLink = usgsStart + schiapPart
schiapTitle = soup.find('h2', class_='title').text
HemisphereTitles.append(schiapTitle)
HemisphereLinks.append(schiapLink)

#Syrtis
SyrtisUrl = "https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced"
browser.visit(SyrtisUrl)
html = browser.html
soup = bs(html, 'lxml-xml')
SyrtisHome = soup.find('img', class_='wide-image')
SyrtisStrRaw = (str(SyrtisHome))
SyrtisPart = SyrtisStrRaw[(SyrtisStrRaw.find('/cache')):(SyrtisStrRaw.find('jpg')+3)]
SyrtisLink = usgsStart + SyrtisPart
SyrtisTitle = soup.find('h2', class_='title').text
HemisphereTitles.append(SyrtisTitle)
HemisphereLinks.append(SyrtisLink)

#Valles
VallesUrl = "https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced"
browser.visit(VallesUrl)
html = browser.html
soup = bs(html, 'lxml-xml')
VallesHome = soup.find('img', class_='wide-image')
VallesStrRaw = (str(VallesHome))
VallesPart = VallesStrRaw[(VallesStrRaw.find('/cache')):(VallesStrRaw.find('jpg')+3)]
VallesLink = usgsStart + VallesPart
VallesTitle = soup.find('h2', class_='title').text
HemisphereTitles.append(VallesTitle)
HemisphereLinks.append(VallesLink)

HemisphereDict = dict(zip(HemisphereTitles, HemisphereLinks))
HemisphereDict

{'Cerberus Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
 'Schiaparelli Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
 'Syrtis Major Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
 'Valles Marineris Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}