In [9]:
# declare dependencies
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from splinter import Browser
import time

from pymongo import MongoClient
client = MongoClient()

browser = Browser('chrome',headless=False)

# Mars News

In [10]:
def get_mars_headline():

    # visited mars news through the splinter module
    news_url = 'https://mars.nasa.gov/news/'
    browser.visit(news_url)
    time.sleep(2)

    # created an html object to obtain html content
    news_html = browser.html

    # parsing through html using beautiful soup
    soup = BeautifulSoup(news_html, 'html.parser')

    # assigned the title and paragraph text to variables
    news_title = soup.find('div', {'class': 'content_title'}).find('a').text
    news_p = soup.find('div', {'class': 'article_teaser_body'}).text
    
    # function will return the news_title / news_p variables in a dict format
    return {'news_title':news_title, 'news_paragraph': news_p}

# assigned the get_mars_headline() function to a variable
headlines_dict = get_mars_headline()
headlines_dict

{'news_title': 'After a Reset, Curiosity Is Operating Normally',
 'news_paragraph': 'Curiosity has returned to science operations and is once again exploring the clay unit. '}

# JPL Mars Space Images - Featured Image

In [11]:
def get_featured_image_url():

    # visited mars space images through the splinter module
    image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(image_url)
    time.sleep(2)

    # created an html object to obtain html content
    image_html = browser.html

    # parsing through html using beautiful soup
    soup = BeautifulSoup(image_html, 'html.parser')

    # retrieved background-image url from article > style tag
    article = soup.find('article')['style']

    # sliced the image url to only include the path to the image
    image_url = article[23:-3]

    # website url 
    main_url = 'https://www.jpl.nasa.gov'

    # combined the website url with the spliced image url
    featured_image_url = main_url + image_url
    
    # function will return the full image url in dict format
    return {'image_url': featured_image_url}

# assigned the get_featured_image_url() function to a variable
featured_image_dict = get_featured_image_url()
featured_image_dict

{'image_url': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16711-1920x1200.jpg'}

# Mars Weather

In [12]:
def get_mars_weather():

    # visited mars weather through the splinter module
    weather_url = "https://twitter.com/marswxreport?lang=en"
    browser.visit(weather_url)
    time.sleep(2)

    # created an html object to obtain html content
    weather_html = browser.html

    # parsing through html using beautiful soup
    soup = BeautifulSoup(weather_html, 'html.parser')

    # retrieved html content containing tweet information
    latest_tweets = soup.find_all('div', {'class':'js-tweet-text-container'})

    # for loop to loop through tweets and to retrieve the first tweet  
    # with weather information 
    for tweet in latest_tweets:
        weather_tweet = tweet.find('p').text
        # two strings found in every weather-related tweet
        if 'sol' and 'low' in weather_tweet:
            
            # function will return mars weather tweet in dict format
            return {'mars_weather': weather_tweet}
            break
            
        else:
            pass

# assigned the get_mars_weather() function to a variable
mars_weather_dict = get_mars_weather()
mars_weather_dict

{'mars_weather': 'InSight sol 92 (2019-03-01) low -94.4ºC (-137.9ºF) high -12.9ºC (8.8ºF)\nwinds from the SW at 4.6 m/s (10.2 mph) gusting to 10.4 m/s (23.2 mph)\npressure at 7.20 hPapic.twitter.com/zxXhRFOwTo'}

# Mars Facts

In [13]:
def get_mars_facts():

    # visited mars facts through the splinter module
    fact_url = "https://space-facts.com/mars/"
    browser.visit(fact_url)
    time.sleep(2)
    
    # parsed the html using pandas
    mars_facts = pd.read_html(fact_url)

    # assigned the facts dataframe to mars_df
    mars_df = mars_facts[0]

    # assigned the column names in the mars_df
    mars_df.columns = ['Description', 'Measurement']
    formatted = mars_df.to_html(classes=["table-dark", "table-hover"])
    
    # function will return mars facts html table in dict format
    return {"html_table_facts": formatted}

# assigned the get_mars_facts() function to a variable
mars_facts_dict = get_mars_facts()
mars_facts_dict

{'html_table_facts': '<table border="1" class="dataframe table-dark table-hover">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Measurement</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n    

# Mars Hemispheres

In [14]:
def get_mars_hemispheres():
    
    # visited mars hemispheres through the splinter module
    hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(hemi_url)
    time.sleep(2)

    # created an html object to obtain html content
    hemi_html = browser.html

    # parsing through html using beautiful soup
    soup = BeautifulSoup(hemi_html, 'html.parser')

    # retrieved container holding hemi info
    products = soup.find_all('div', {'class': 'item'})

    # created empty list for hemisphere urls
    hemi_image_urls = []

    # stored the main url
    hemi_main_url = 'https://astrogeology.usgs.gov'

    # for loop to find hemi info
    for p in products:

        # stored hemi title as object
        title = p.find('h3').text

        # stored link that leads to full image website
        partial_img_url = p.find('a', {'class': 'itemLink product-item'})['href']

        # visited the link that contains the full image website
        browser.visit(hemi_main_url + partial_img_url)

        # created an html object to obtain html content
        partial_img_html = browser.html

        # parsing through html using beautiful soup
        soup = BeautifulSoup(partial_img_html, 'html.parser')

        # stored full image url as object
        img_url = hemi_main_url + soup.find('img', {'class': 'wide-image'})['src']

        # appended the retreived information into hemi_image_urls list
        hemi_image_urls.append({"title": title, "img_url": img_url})

    # function will return mars hemisphere image urls in a list of dicts format
    return {"hemi_image_urls":hemi_image_urls}


# assigned the get_mars_hemispheres() function to a variable
mars_hemi_dict = get_mars_hemispheres()
mars_hemi_dict

{'hemi_image_urls': [{'title': 'Cerberus Hemisphere Enhanced',
   'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
  {'title': 'Schiaparelli Hemisphere Enhanced',
   'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
  {'title': 'Syrtis Major Hemisphere Enhanced',
   'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
  {'title': 'Valles Marineris Hemisphere Enhanced',
   'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]}

In [15]:
# Insert the dicts into Mongo - but first merge our dictionaries into one dict
merged_dict = {**headlines_dict, **featured_image_dict,
               **mars_facts_dict, **mars_weather_dict, **mars_hemi_dict}
merged_dict

{'news_title': 'After a Reset, Curiosity Is Operating Normally',
 'news_paragraph': 'Curiosity has returned to science operations and is once again exploring the clay unit. ',
 'image_url': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16711-1920x1200.jpg',
 'html_table_facts': '<table border="1" class="dataframe table-dark table-hover">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Measurement</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (

In [16]:
db = client.mission_to_mars
collection = db.mars_collection
collection.insert_one(merged_dict)

<pymongo.results.InsertOneResult at 0x116988048>