In [1]:
# ------------------------------------------------------------
#  Step 1: Setup
# ------------------------------------------------------------
from bs4 import BeautifulSoup as soup
from splinter import Browser
import pandas as pd
import aux_func as aux


# initialize splinter browser
browser = Browser('chrome', 
                  **{"executable_path": "C:/Users/Drew's Surface/AppData/Local/chromedriver"}, 
                  headless=False)

In [2]:
# ------------------------------------------------------------
#  Step 2: NASA Mars News Scrape from NASA
# ------------------------------------------------------------
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
webpage = aux.getParsedWebpage(browser, url)

# pull the most recent headlines + info from the website
headlines_grouped = soup.find_all(webpage, 'h3', class_=None)
text_grouped = soup.find_all(webpage, 'div', class_='article_teaser_body')

print(aux.getParsedTextList(headlines_grouped)[0])
print(aux.getParsedTextList(text_grouped)[0])

Mars InSight's 'Mole' Is Moving Again
The NASA lander's robotic arm seems to have helped its heat probe burrow almost 2 centimeters (3/4 of an inch) since last week.


In [3]:
# ------------------------------------------------------------
#  Step 3: JPL Mars Space Images - Featured Image
# ------------------------------------------------------------
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
webpage = aux.getParsedWebpage(browser, url)

# get and construct url for largest size of featured image available
featured_url = soup.find(webpage, 'a', class_='button fancybox').get('data-fancybox-href')
featured_filename = featured_url.split('/')[4].split('_')[0]
featured_url = f'https://www.jpl.nasa.gov/spaceimages/images/largesize/{featured_filename}_hires.jpg'
print(featured_url)
print(featured_filename)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17356_hires.jpg
PIA17356


In [4]:
# ------------------------------------------------------------
#  Step 4: Mars Weather from Twitter Scrape
# ------------------------------------------------------------
url = 'https://twitter.com/marswxreport?lang=en'
webpage = aux.getParsedWebpage(browser, url)
std_tweet_class = 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text'

# pull text of most recent tweet about the weather
recent_weather = soup.find_all(webpage, 'p', class_= std_tweet_class)[0].get_text()
recent_weather

'InSight sol 316 (2019-10-16) low -101.8ºC (-151.3ºF) high -25.7ºC (-14.3ºF)\nwinds from the SSE at 4.7 m/s (10.5 mph) gusting to 18.2 m/s (40.8 mph)\npressure at 7.10 hPapic.twitter.com/tXtGZA6IPW'

In [5]:
# ------------------------------------------------------------
#  Step 5: Mars Facts scrape from Space Facts
# ------------------------------------------------------------
url = 'https://space-facts.com/mars/'
webpage = aux.getParsedWebpage(browser, url)

# create dict to hold facts
fact_dict = {}

# get all rows in the facts table and parse into dict
facts_all = soup.find(webpage, 
                      'table', 
                      class_='tablepress tablepress-id-p-mars').find_all('tr')

for fact in facts_all:
    fact_dict[soup.find(fact, 'strong').get_text()] = (soup.find(fact, class_='column-2').get_text())

# convert to Dataframe and to HTML table
fact_df = pd.DataFrame.from_dict(fact_dict, orient='index')
fact_df.rename(columns={0:'Facts about Mars'}, inplace=True)
fact_html = pd.DataFrame.to_html(fact_df)

fact_df

Unnamed: 0,Facts about Mars
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [6]:
# ------------------------------------------------------------
#  Step 6: Mars Hemispheres scrape from USGS Astrogeology site
# ------------------------------------------------------------
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
webpage = aux.getParsedWebpage(browser, url)

# get all unique links to the photo pages first
page_links_list = []
page_links = soup.find_all(webpage, 'a', class_='itemLink product-item')
[page_links_list.append(page.get('href')) for page in page_links]
page_links_list = list(set(page_links_list))

image_list = []

# iterate through links and pull URL for full size images
for link in page_links_list:
    url = f'https://astrogeology.usgs.gov{link}'
    webpage = aux.getParsedWebpage(browser, url)
    
    # get image title
    title = soup.find(webpage, 'h2', class_='title').get_text()
    
    # get full size image link
    downloads_section = soup.find(webpage, 'div', class_='downloads')
    image_link = soup.find(downloads_section, 'a').get('href')
    
    # add title and full-size image url to dict
    image_list.append({'title':title,
                       'image_url':image_link})

# create dataframe
image_df = pd.DataFrame(image_list, columns=['title', 'image_url'])
image_df
image_list

[{'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]