# NASA Mars News
## Scraping

In [None]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pymongo
from webdriver_manager.chrome import ChromeDriverManager
import time

In [None]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [None]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
# Define database and collection
db = client.mars_db
collection = db.articles

In [None]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news'
browser.visit(url)

In [None]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(browser.html, 'html.parser')
soup

In [None]:
# Retrieve the parent divs for all paragraphs
results = soup.find_all('div', class_='list_text')

# loop over results to get article data
for result in results[1:]:
    # scrape the article title
    title = result.find('div', class_='content_title').text
    
    # scrape the article paragraph
    paragraph = result.find('div', class_='article_teaser_body').text
    
    # scrape the date
    date = result.find('div', class_='list_date').text
    print(f'paragraph = {paragraph}')
    
    time.sleep(0.5)
    
    # print article data
    print('-----------------')
    print(title)
    print(paragraph)
    print(date)
   

    # Dictionary to be inserted into MongoDB
    post = {
        'title': title,
        'paragraph': paragraph,
        'date': date
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

In [None]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

# JPL Mars Space Images - Featured Image

In [None]:
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [None]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(browser.html, 'html.parser')
soup

In [None]:

featured_image_url = []

# Retrieve the parent div for the image
results = soup.find_all('div', class_='floating_text_area')

# Iterate through the floating text area
for result in results:
    # Use Beautiful Soup's find() method to navigate and retrieve attributes
    a = result.find('a')
    link = result.find('a')
    href = link['href']
    image_url = ('https://spaceimages-mars.com/' + href)
    print('-----------')
    print(image_url)
    featured_image_url.append(image_url)
    
    time.sleep(0.5)

    # Click the 'FULL IMAGE' button
try:
    browser.links.find_by_partial_text('FULL IMAGE').click()
          
except:
    print("Scraping Complete")

In [None]:
featured_image_url[0]

# Mars Facts

In [None]:
# Import dependencies
import pandas as pd

In [None]:
# Identify the url
url = 'https://galaxyfacts-mars.com/'

In [None]:
# Use Panda's 'read_html' to parse the url
tables = pd.read_html(url)
# Find the correct table
tables[1]

In [None]:
# Rename the column headings
mars_df = tables[1]
mars_df.columns = ['Feature', 'Measurement']
mars_df

In [None]:
# Convert to an html file
mars_df.to_html('mars_data.html', classes = 'table table-striped', index = False)

# Mars Hemispheres

In [None]:
url = 'https://marshemispheres.com/'
browser.visit(url)

In [None]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(browser.html, 'html.parser')
soup

In [None]:
for link in soup.find_all('a'):
    print(link.get('href'))

In [None]:
# Retrieve the parent divs for all images
results = soup.find_all('div', class_='item')

# loop over results to get image
for result in results:
    # scrape Valles image
    valles = result.find('a', src = 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg').text
    
    # scrape Cerberus image
    cerberus = result.find('a', src ='https://marshemispheres.com/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png').text
    
    # scrape Cerberus image
    schiaparelli = result.find('a', src = 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg').text
                           
    # scrape Cerberus image
    syrtis = result.find('a', src = 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg').text
    
    time.sleep(0.5)
    
    # print article data
    print('-----------------')
    print(title)
    print(img_url)
   
    
    hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": "https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg"},
    {"title": "Cerberus Hemisphere", "img_url": "https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg"},
    {"title": "Schiaparelli Hemisphere", "img_url": "https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg"},
    {"title": "Syrtis Major Hemisphere", "img_url": "https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg"},
]
    
    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

In [None]:
browser.quit()