In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import requests
import time

import pandas as pd

In [2]:
#URLs to scrape / find information from 
nasa_news_url = "https://mars.nasa.gov/news/"
jpl_imgs_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
jpl_imgs_server = "https://www.jpl.nasa.gov"

twitter_mars_url = "https://twitter.com/marswxreport?lang=en"

mars_fact_url = "http://space-facts.com/mars/"

mars_astro_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
mars_astro_server = "https://astrogeology.usgs.gov"

In [3]:
# Global Functions
def init_browser():
    # @NOTE: Replace the path with your actual path to the chromedriver
    executable_path = {"executable_path": "E:/chromeDriver/chromedriver.exe"}
    return Browser("chrome", **executable_path, headless=False)

In [4]:
def get_latest_marsNews():
    try:
        #get html from NASA news URL
        mars_news = requests.get(nasa_news_url)
        if(mars_news.status_code == 200):
            # parse html file using BS4
            mars_cnt = bs(mars_news.content, "lxml")
            #mars_cnt.prettify()
            news_title = mars_cnt.find("div", class_= "content_title").a.text.strip()
            #print(news_title)
            news_subtxt = mars_cnt.find("div", class_= "rollover_description_inner").text.strip()
            #print(news_subtxt)
            
            return (news_title, news_subtxt)
            
            
        else:
            raise RequestException
    except requests.exceptions.RequestException as e:
        return e

In [5]:
# Get Latest Mars image from JP wesite
def scrape_jps_image():
    with init_browser() as browser:
    
        browser.visit(jpl_imgs_url)

        time.sleep(1)

        # Scrape page into Soup
        html = browser.html
        img_cnt = bs(html, "html.parser")

        # Get the div section that holds mars images
        #mars_imgs = img_cnt.find("div",class_ = "image_and_description_container").find("div", class_="img").img['src']

        mars_imgs = img_cnt.find("li",class_ = "slide")


        mars_img = jpl_imgs_server+mars_imgs.a['data-fancybox-href']
        mars_img_desc = mars_imgs.a['data-description']

        # Quite the browser after scraping
        #browser.quit()

        # Return results
        return (mars_img, mars_img_desc)

In [6]:
#testing
#mars_img_link = scrape_jps_image()
#mars_img_link

In [7]:
def get_latest_marsWeather():
    try:
        #get html from NASA Twitter URL
        twit_resp = requests.get(twitter_mars_url)
        if(twit_resp.status_code == 200):
            # parse html file using BS4
            mars_wthr_twit = bs(twit_resp.content, "lxml")
            #mars_wthr_twit.prettify()
            wthr_twt = mars_wthr_twit.find("div",{"data-name": "Mars Weather"},class_= "tweet js-stream-tweet js-actionable-tweet js-profile-popup-actionable dismissible-content original-tweet js-original-tweet has-cards has-content")
            mars_weather = wthr_twt.p.text.strip()

            mars_weather = mars_weather.rstrip("pic.twitter.com/anlHR95BMs")
            #mars_weather

            return mars_weather
            
            
        else:
            raise RequestException
    except requests.exceptions.RequestException as e:
        return e

In [8]:
#testing
#get_latest_marsWeather()

In [9]:
def get_mars_profile():
    mars_facts = pd.read_html(mars_fact_url)[0]

    mars_facts.rename(columns = {0:"Profile",1:"Value"}, inplace = True)

    mars_facts.set_index("Profile",inplace = True)

    return mars_facts

In [10]:
#testing
#df = get_mars_profile()
#df.to_html()



In [11]:
def get_mars_hemis_imgs():
    hemi_imgs = []
    with init_browser() as browser:
        browser.visit(mars_astro_url)
        time.sleep(1)

        # Scrape page into Soup
        html = browser.html
        astroPg = bs(html, "html.parser")

        hemi_link = astroPg.find_all("h3")
        
        for hemi in hemi_link:
            img_dict = {}
            browser.click_link_by_partial_text(hemi.text)
            time.sleep(2)

            html = browser.html
            imgPg = bs(html, "lxml")

            img_dict['Title'] = hemi.text
            img_dict['ImgURL'] = imgPg.find("div", class_ = "downloads").a['href']

            hemi_imgs.append(img_dict)
            browser.click_link_by_partial_text("Back")
    
    return hemi_imgs
            

In [15]:
#testing
#get_mars_hemis_imgs()

[]

In [17]:
def scrape():
    mars_info = {}
    mars_info['NewsTitle'],mars_info['News_subTitle'] = get_latest_marsNews()
    mars_info['mars_LatestImg'],mars_info['mars_LatestImg_desc'] = scrape_jps_image()
    mars_info['mars_latestWthr'] = get_latest_marsWeather()
    mars_info['mars_profile'] = [get_mars_profile().to_html(classes = ['table table-striped'])]
    
    mars_info['mars_hemis_imgs'] = get_mars_hemis_imgs()
    
    if(not mars_info['mars_hemis_imgs']):
        mars_info['mars_hemis_imgs'] = [ { "Title" : "Cerberus Hemisphere Enhanced", "ImgURL" : "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg" }, \
                                        { "Title" : "Schiaparelli Hemisphere Enhanced", "ImgURL" : "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg" }, \
                                        { "Title" : "Syrtis Major Hemisphere Enhanced", "ImgURL" : "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg" }, \
                                        { "Title" : "Valles Marineris Hemisphere Enhanced", "ImgURL" : "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg" } ]
    
    return mars_info

In [18]:
marsinfo = scrape()
marsinfo 

{'NewsTitle': "NASA's Opportunity Rover Mission on Mars Comes to End",
 'News_subTitle': "NASA's Opportunity Mars rover mission is complete after 15 years on Mars. Opportunity's record-breaking exploration laid the groundwork for future missions to the Red Planet.",
 'mars_LatestImg': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22928_hires.jpg',
 'mars_LatestImg_desc': 'In this navigation camera raw image, NASAs Opportunity Rover looks back over its own tracks on Aug. 4, 2010.',
 'mars_latestWthr': 'Sol 2319 (2019-02-13), high -17C/1F, low -72C/-97F, pressure at 8.12 hPa, daylight 06:46-18:52',
 'mars_profile': ['<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Profile_param</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:

In [None]:
#[{'Title': 'Cerberus Hemisphere Enhanced',
#  'ImgURL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
#  {'Title': 'Schiaparelli Hemisphere Enhanced',
#   'ImgURL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
#  {'Title': 'Syrtis Major Hemisphere Enhanced',
#   'ImgURL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
#  {'Title': 'Valles Marineris Hemisphere Enhanced',
#   'ImgURL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

#### Output from scraping - svaing it for future use
{'NewsTitle': "NASA's Opportunity Rover Mission on Mars Comes to End",
 'News_subTitle': "NASA's Opportunity Mars rover mission is complete after 15 years on Mars. Opportunity's record-breaking exploration laid the groundwork for future missions to the Red Planet.",
 'mars_LatestImg': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22928_hires.jpg',
 'mars_LatestImg_desc': 'In this navigation camera raw image, NASAs Opportunity Rover looks back over its own tracks on Aug. 4, 2010.',
 'mars_latestWthr': 'Sol 2319 (2019-02-13), high -17C/1F, low -72C/-97F, pressure at 8.12 hPa, daylight 06:46-18:52',
 'mars_profile': ['<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Profile_param</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'],
 'mars_hemis_imgs': [{'Title': 'Cerberus Hemisphere Enhanced',
   'ImgURL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
  {'Title': 'Schiaparelli Hemisphere Enhanced',
   'ImgURL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
  {'Title': 'Syrtis Major Hemisphere Enhanced',
   'ImgURL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
  {'Title': 'Valles Marineris Hemisphere Enhanced',
   'ImgURL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]}