# Module 12 Challenge
## Deliverable 1: Scrape Titles and Preview Text from Mars News

In [69]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [70]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

### Step 1: Visit the Website

1. Use automated browsing to visit the [Mars NASA news site](https://redplanetscience.com). Inspect the page to identify which elements to scrape.

      > **Hint** To identify which elements to scrape, you might want to inspect the page by using Chrome DevTools.

In [71]:
# Visit the Mars NASA news site: https://redplanetscience.com
url = 'https://redplanetscience.com'
browser.visit(url)

### Step 2: Scrape the Website

Create a Beautiful Soup object and use it to extract text elements from the website.

In [72]:
# Extract text elements for article title and teaser text
html = browser.html
soup = bs(html, 'html.parser')

In [73]:
all_stories = soup.find_all('div', class_="list_text")

In [51]:
# Create an empty list to store the dictionaries
stories = []
# Loop through the text elements
for story in all_stories:
    # Extract the title and preview text from the elements
    title = story.find(class_='content_title').text
    preview = story.find(class_='article_teaser_body').text
    date = story.find(class_='list_date').text
    # Store each title and preview pair in a dictionary
    story = {"date":date,"title":title,"preview":preview}
    # Add the dictionary to the list    
    stories.append(story)
# Print the list to confirm success
print(stories)

[{'date': 'December 15, 2022', 'title': "NASA Readies Perseverance Mars Rover's Earthly Twin ", 'preview': "Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape."}, {'date': 'December 14, 2022', 'title': "NASA's Perseverance Rover Is Midway to Mars ", 'preview': "Sometimes half measures can be a good thing – especially on a journey this long. The agency's latest rover only has about 146 million miles left to reach its destination."}, {'date': 'December 11, 2022', 'title': "8 Martian Postcards to Celebrate Curiosity's Landing Anniversary", 'preview': 'The NASA rover touched down eight years ago, on Aug. 5, 2012, and will soon be joined by a second rover, Perseverance.'}, {'date': 'December 10, 2022', 'title': "NASA's New Mars Rover Will Use X-Rays to Hunt Fossils", 'preview': "PIXL, an instrument on the end of the Perseverance rover's arm, will search for chemical f

In [55]:
#covert list into a Panda DataFrame for easier viewing of the data
stories_df = pd.DataFrame(stories)
stories_df

Unnamed: 0,date,title,preview
0,"December 15, 2022",NASA Readies Perseverance Mars Rover's Earthly...,Did you know NASA's next Mars rover has a near...
1,"December 14, 2022",NASA's Perseverance Rover Is Midway to Mars,Sometimes half measures can be a good thing – ...
2,"December 11, 2022",8 Martian Postcards to Celebrate Curiosity's L...,"The NASA rover touched down eight years ago, o..."
3,"December 10, 2022",NASA's New Mars Rover Will Use X-Rays to Hunt ...,"PIXL, an instrument on the end of the Persever..."
4,"December 10, 2022",The Detective Aboard NASA's Perseverance Rover,"An instrument called SHERLOC will, with the he..."
5,"December 8, 2022",Meet the People Behind NASA's Perseverance Rover,These are the scientists and engineers who bui...
6,"December 8, 2022",Three New Views of Mars' Moon Phobos,Taken with the infrared camera aboard NASA's O...
7,"December 5, 2022",NASA's MAVEN Explores Mars to Understand Radio...,NASA’s MAVEN spacecraft has discovered “layers...
8,"November 28, 2022",5 Hidden Gems Are Riding Aboard NASA's Perseve...,"The symbols, mottos, and small objects added t..."
9,"November 26, 2022",NASA's Perseverance Rover Mission Getting in S...,Stacking spacecraft components on top of each ...


In [67]:
browser.quit()

### (Optional) Step 4: Export the Data

Optionally, store the scraped data in a file or database (to ease sharing the data with others). To do so, export the scraped data to either a JSON file or a MongoDB database.

In [66]:
# Export data to JSON
stories_df.to_json('mars_stories.json')

In [64]:
# Export data to MongoDB
import pymongo
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
db = client.mars_news_db

mars_stories = db.stories

mars_stories.insert_many(stories)



<pymongo.results.InsertManyResult at 0x2ae532b1708>

In [65]:
results = mars_stories.find()
for result in results:
    print(result)

{'_id': ObjectId('639b58a907448a35796ee170'), 'date': 'December 15, 2022', 'title': "NASA Readies Perseverance Mars Rover's Earthly Twin ", 'preview': "Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape."}
{'_id': ObjectId('639b58a907448a35796ee171'), 'date': 'December 14, 2022', 'title': "NASA's Perseverance Rover Is Midway to Mars ", 'preview': "Sometimes half measures can be a good thing – especially on a journey this long. The agency's latest rover only has about 146 million miles left to reach its destination."}
{'_id': ObjectId('639b58a907448a35796ee172'), 'date': 'December 11, 2022', 'title': "8 Martian Postcards to Celebrate Curiosity's Landing Anniversary", 'preview': 'The NASA rover touched down eight years ago, on Aug. 5, 2012, and will soon be joined by a second rover, Perseverance.'}
{'_id': ObjectId('639b58a907448a35796ee173'), 'date': 'December 10,