# Module 12 Challenge | Web - Scraping

## Part 1 | Scrape Titles & Preview Text from Mars News

In [34]:
# Import Splinter and BeautifulSoup
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager
import json

In [21]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

### Visit the Website

In [22]:
# Visit the Mars NASA news site: https://redplanetscience.com
url = "http://redplanetscience.com"
browser.visit(url)

#### Scrape the Website

In [23]:
# Create a Beautiful Soup object
html = browser.html
soup = soup(html, "html.parser")

In [24]:
# Extract all the text elements
news = soup.find_all("div", class_="content_title")[0]
news_title = news.text
news_title

results = soup.find_all("div", class_="content_title")
news_title = results[1].text
print(f"Title: {news_title}")

results = soup.find_all("div", class_ = "article_teaser_body")
news_teaser = results[1].text
print(f"Teaser Preview: {news_teaser}")

Title: Global Storms on Mars Launch Dust Towers Into the Sky
Teaser Preview: A Mars Dust Tower Stands Out Dust storms are common on Mars. But every decade or so, something unpredictable happens: a series of runaway storms break out, covering the entire planet in a dusty haze.


### Store the Results

In [25]:
# Create an empty list to store the dictionaries
scraped_data = []

In [26]:
# Adding a waiting limit to allow the webpage to load
import time
time.sleep(5)

In [27]:
# Loop through the text elements
# Extract the title and preview text from the elements
# Store each title and preview pair in a dictionary
# Add the dictionary to the list

articles = soup.find_all('div', class_='list_text')

for article in articles:
    title = article.find('div', class_='content_title').text.strip()
    preview = article.find('div', class_='article_teaser_body').text.strip()
    dictionary = {
        'title': title,
        'preview': preview, 
    }
    
    scraped_data.append(dictionary)

In [30]:
# Print the list to confirm success
scraped_data

[{'title': "Media Get a Close-Up of NASA's Mars 2020 Rover",
  'preview': "The clean room at NASA's Jet Propulsion Laboratory was open to the media to see NASA's next Mars explorer before it leaves for Florida in preparation for a summertime launch."},
 {'title': 'Global Storms on Mars Launch Dust Towers Into the Sky',
  'preview': 'A Mars Dust Tower Stands Out Dust storms are common on Mars. But every decade or so, something unpredictable happens: a series of runaway storms break out, covering the entire planet in a dusty haze.'},
 {'title': 'The MarCO Mission Comes to an End',
  'preview': 'The pair of briefcase-sized satellites made history when they sailed past Mars in 2019.'},
 {'title': "NASA's MAVEN Explores Mars to Understand Radio Interference at Earth",
  'preview': 'NASA’s MAVEN spacecraft has discovered “layers” and “rifts” in the electrically charged part of the upper atmosphere of Mars.'},
 {'title': "Robotic Toolkit Added to NASA's Mars 2020 Rover",
  'preview': "The bit

In [31]:
# Close the browser.
browser.quit()

#### Export the Data

In [35]:
# Export data to JSON
json_mars = json.dumps(scraped_data)

df = pd.read_json(json_mars)
df.to_csv('mars_news.csv', index=False)