Deliverable 1: Web scraping Mars News

In [1]:
# Import dependencies

import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager
import json

In [2]:
# Set up Splinter

executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Visit the Mars NASA news site

url = 'https://redplanetscience.com'
browser.visit(url)

# Optional delay for loading the page

browser.is_element_present_by_css('div.list_text', wait_time=1)

True

In [4]:
# Parse the HTML and select all news articles

html = browser.html
news_soup = soup(html, 'html.parser')
slide_elems = news_soup.select('div.list_text')

In [5]:
# Find and store all news article titles and preview texts

news_list = []
for elem in slide_elems:
    # Use the parent element to find the news article title
    title = elem.find('div', class_='content_title').text
    # Use the parent element to find the paragraph text
    preview = elem.find('div', class_='article_teaser_body').text
    # Append each key-value pair to a list/dict
    news_list.append({'title': title, 'preview': preview})

In [6]:
print(news_list)

[{'title': 'The Man Who Wanted to Fly on Mars', 'preview': "The Mars Helicopter is riding to the Red Planet this summer with NASA's Perseverance rover. The helicopter's chief engineer, Bob Balaram, shares the saga of how it came into being."}, {'title': "NASA's Perseverance Rover 100 Days Out", 'preview': "Mark your calendars: The agency's latest rover has only about 8,640,000 seconds to go before it touches down on the Red Planet, becoming history's next Mars car."}, {'title': "NASA's Mars 2020 Rover Closer to Getting Its Name", 'preview': "155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July."}, {'title': 'Mars 2020 Stands on Its Own Six Wheels', 'preview': "In time-lapse video, taken at JPL, captures the first time NASA's Mars 2020 rover carries its full weight on its legs and wheels."}, {'title': 'Scientists Explore Outback as Testbed for Mars ', 'preview': "Australia

In [7]:
# Save to JSON

final = json.dumps(news_list)
with open('mars_news.json', 'w') as f:
    json.dump(final, f)

In [8]:
print(final)

[{"title": "The Man Who Wanted to Fly on Mars", "preview": "The Mars Helicopter is riding to the Red Planet this summer with NASA's Perseverance rover. The helicopter's chief engineer, Bob Balaram, shares the saga of how it came into being."}, {"title": "NASA's Perseverance Rover 100 Days Out", "preview": "Mark your calendars: The agency's latest rover has only about 8,640,000 seconds to go before it touches down on the Red Planet, becoming history's next Mars car."}, {"title": "NASA's Mars 2020 Rover Closer to Getting Its Name", "preview": "155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July."}, {"title": "Mars 2020 Stands on Its Own Six Wheels", "preview": "In time-lapse video, taken at JPL, captures the first time NASA's Mars 2020 rover carries its full weight on its legs and wheels."}, {"title": "Scientists Explore Outback as Testbed for Mars ", "preview": "Australia

In [9]:
# Convert to JSON format

final = json.dumps(news_list,indent=2)

In [10]:
print(final)

[
  {
    "title": "The Man Who Wanted to Fly on Mars",
    "preview": "The Mars Helicopter is riding to the Red Planet this summer with NASA's Perseverance rover. The helicopter's chief engineer, Bob Balaram, shares the saga of how it came into being."
  },
  {
    "title": "NASA's Perseverance Rover 100 Days Out",
    "preview": "Mark your calendars: The agency's latest rover has only about 8,640,000 seconds to go before it touches down on the Red Planet, becoming history's next Mars car."
  },
  {
    "title": "NASA's Mars 2020 Rover Closer to Getting Its Name",
    "preview": "155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July."
  },
  {
    "title": "Mars 2020 Stands on Its Own Six Wheels",
    "preview": "In time-lapse video, taken at JPL, captures the first time NASA's Mars 2020 rover carries its full weight on its legs and wheels."
  },
  {
    "title": "Scienti

In [11]:
browser.quit()