In [6]:
# Import Dependencies
import pandas as pd # Import pandas to read the html page
import time

from splinter import Browser
from bs4 import BeautifulSoup

import tweepy
from config import (consumer_key, consumer_secret, 
                    access_token, access_token_secret)

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

# Function to initialize the browser for chrome
def init_browser():
    # @NOTE: Replace the path with your actual path to the chromedriver
    executable_path = {"executable_path": "chromedriver"}
    return Browser("chrome", **executable_path, headless=False)

Mars_Data = {}

def scrape():
    
### NASA Mars News
    
    # Initialize browser
    browser = init_browser()
    
    NASA_Mars_News = {}

    # Visit the Mars Nasa site for news
    url = 'https://mars.nasa.gov/news/'
    
    # Using url attribute to access the visited page’s url
    browser.visit(url)
    time.sleep(2)

    # Using the html attribute to get the html content of the visited page
    html = browser.html
    
    # Quitting the browser 
    browser.quit()
    
    # Create a soup object to find the latest news from the URL
    MarsNewsData = BeautifulSoup(html, "html.parser")

    Result = MarsNewsData.select_one('ul.item_list li.slide')

    news_title = Result.find('div',class_="content_title").text

    news_p = Result.find('div',class_="article_teaser_body").text
  
    Mars_Data['News_Title'] = news_title
    Mars_Data['News_Para'] = news_p
    
### JPL Mars Space Images - Featured Image
   
    # Initialize browser
    browser = init_browser()

    # Visit jpl NASA site for the featured image
    url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

    # Using url attribute to access the visited page’s url
    browser.visit(url)
    time.sleep(2)
    
    # Using the html attribute to get the html content of the visited page
    html = browser.html
    
    # Quitting the browser 
    browser.quit()

    # Create a soup object to find the featured image from the URL
    MarsImageData = BeautifulSoup(html,"html.parser")

    Mars_Img_Art = MarsImageData.find_all('ul',class_='articles')
    for all in Mars_Img_Art:
        img = all.find_all('li',class_='slide')
    for all in img:
        if 'Mars' in (all.a['data-description'].split()):
            image = all.a["data-fancybox-href"]  

    featured_image_url = 'https://www.jpl.nasa.gov'+ image
    
    Mars_Data['Featured_Image_Url'] = featured_image_url
    
### Mars Weather

    # Target User Account
    target_user = "@MarsWxReport"

    # Get all tweets from home feed
    public_tweets = api.user_timeline(target_user)

    mars_weather = ""
    flag = False

    for tweet in public_tweets:
        if flag == True:
            mars_weather = tweet["text"]
            flag = False
            break
        else:  
            if 'Sol' in tweet["text"]:
                flag = True
    
    Mars_Data['Weather'] = mars_weather
    
### Mars Facts
    
    # Initialize browser
    browser = init_browser()
    
    # Visit space facts site for the Mars facts
    url = 'http://space-facts.com/mars/'

    # Using url attribute to access the visited page’s url
    browser.visit(url)
    time.sleep(2)
    
    # Using the html attribute to get the html content of the visited page
    html = browser.html

    # Quitting the browser 
    browser.quit()
    
    # Create a soup object to find the Mars facts from the URL
    MarsFactsData = BeautifulSoup(html,"html.parser")

    MarsFactsData.find_all('table')

    Marstab = MarsFactsData.find_all('table')

    df = pd.read_html(str(Marstab))

    df=df[0].set_index([0])

    df=df.rename(columns={1:"Values"})

    marsfactshtml = df.to_html()

    Mars_Data['Facts Table'] = marsfactshtml
    
### Mars Hemispheres
    
    # Initialize browser
    browser = init_browser()
    
    # Visit the US govt.'s astrology site for the Mars hemispheres
    url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

    # Using url attribute to access the visited page’s url
    browser.visit(url)
    time.sleep(2)
    
    # Using the html attribute to get the html content of the visited page
    html = browser.html

    # Quitting the browser 
    browser.quit()

    # Create a soup object to find the Mars hemispheres from the URL
    MarsHemisphereImage = BeautifulSoup(html, "html.parser")

    Img4 = MarsHemisphereImage.find('div', class_='collapsible').find_all('a', class_='product-item')

    url = "https://astrogeology.usgs.gov"

    index = 0
    mars_hemispheres_list = []
    
    for img in Img4:
        if index%2!=0:
            title = img.find('h3').text

            url1 = url+img.get('href')

            #Initialize the browser    
            browser = init_browser()

            # Using url attribute to access the visited page’s url    
            browser.visit(url1)
            time.sleep(2)
            
            # Using the html attribute to get the html content of the visited page
            html = browser.html

            # Quitting the browser 
            browser.quit()

             # Create a soup object to find the enlarged image of the Mars hemispheres from the URL
            MarsHemisphereImages = BeautifulSoup(html, "html.parser")

            MarshemisImage = MarsHemisphereImages.find('img', class_='wide-image')

            link = MarshemisImage.get('src')

            Image_url = url+link

            mars_hemispheres_list.append({'title':title,'img_url':Image_url})

        index=index+1
        
    Mars_Data['Mars_Hemispheres'] = mars_hemispheres_list
                                                            
    return(Mars_Data)                                                    

In [7]:
scrape()

{'News_Title': 'Curiosity Surveys a Mystery Under Dusty Skies',
 'News_Para': "NASA's Curiosity rover surveyed its surroundings on Mars, producing a 360-degree panorama of its current location on Vera Rubin Ridge.",
 'Featured_Image_Url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22614_hires.jpg',
 'Weather': 'Sol 2160 (2018-09-03), high -8C/17F, low -68C/-90F, pressure at 8.85 hPa, daylight 05:36-17:52',
 'Facts Table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Values</th>\n    </tr>\n    <tr>\n      <th>0</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>