In [69]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import requests
import pymongo
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.mars_db
collection = db.articles

# NASA Mars News

In [4]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='slide')

# loop over results to get article data
for result in results:
    
    # scrape the article title 
    news_title = result.find('div', class_='content_title').text
    
    # scrape the article paragraph text
    news_p = result.find('div', class_='rollover_description_inner').text
    
    # print article data
    print('-----------------')
    print(news_title)
    print(news_p)

    # Dictionary to be inserted into MongoDB
    post = {
        'title': news_title,
        'paragraph': news_p
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------


NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities



Starting July 27, news activities will cover everything from mission engineering and science to returning samples from Mars to, of course, the launch itself.

-----------------


The Launch Is Approaching for NASA's Next Mars Rover, Perseverance



The Red Planet's surface has been visited by eight NASA spacecraft. The ninth will be the first that includes a roundtrip ticket in its flight plan. 

-----------------


NASA to Hold Mars 2020 Perseverance Rover Launch Briefing



Learn more about the agency's next Red Planet mission during a live event on June 17.

-----------------


Alabama High School Student Names NASA's Mars Helicopter



Vaneeza Rupani's essay was chosen as the name for the small spacecraft, which will mark NASA's first attempt at powered flight on another planet.

-----------------


Mars Helicopter Attached to NASA's Perseverance Rover



The team also fueled the rover's sk

# JPL Mars Space Images

In [91]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 86.0.4240
[WDM] - Get LATEST driver version for 86.0.4240
[WDM] - Driver [C:\Users\Max Browning\.wdm\drivers\chromedriver\win32\86.0.4240.22\chromedriver.exe] found in cache


 


In [92]:
# URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Retrieve page with the requests module
browser.visit(url)

# Navigate to featured image page
browser.links.find_by_partial_text('FULL IMAGE').click()
browser.links.find_by_partial_text('more info').click()

# HTML Object
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

In [93]:
# Retrieve all elements that contain image information
image = soup.find('img', class_='main_image')['src']
print(image)

/spaceimages/images/largesize/PIA18816_hires.jpg


# Mars Facts

In [70]:
url = 'https://space-facts.com/mars/'

In [71]:
# Use Panda's 'read_html' to parse the url
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [75]:
# Find the Mars Facts table in the list and assign it to 'mars_df'
df = tables[0]
df.columns = ['Description', 'Mars']
df.set_index('Description', inplace=True)
df

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


# Mars Hemispheres

In [85]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 86.0.4240
[WDM] - Get LATEST driver version for 86.0.4240
[WDM] - Driver [C:\Users\Max Browning\.wdm\drivers\chromedriver\win32\86.0.4240.22\chromedriver.exe] found in cache


 


In [86]:
# Create list to store dictionaries
hemisphere_image_urls = []

In [87]:
# Iterate through all images
for x in range(4):

    # URL of page to be scraped
    url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

    # Retrieve page with the requests module
    browser.visit(url)

    # Navigate to featured image page
    browser.links.find_by_partial_text('Hemisphere Enhanced')[x].click()

    # HTML Object
    html = browser.html

    # Parse HTML with Beautiful Soup
    soup = BeautifulSoup(html, 'html.parser')
    
    # Scrape image URL
    img_url = soup.find('img', class_='wide-image')['src']
    title = soup.find('h2', class_='title').text
    
    # Create dictionary
    hemisphere_dictionary = {
        'title': title,
        'img_url': img_url
    }
    
    # Append list with dictionary
    hemisphere_image_urls.append(hemisphere_dictionary)
    
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': '/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': '/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': '/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': '/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]
