# Deliverable 1: 
Scrape titles and preview text from Mars news articles. Optionally export the data into a JSON file or a MongoDB database.

In [1]:
# Scrape the Mars News Links to an external site.website by using Splinter and Beautiful Soup. 
# Specifically, scrape the title and preview text, or summary text, of each article on the landing page.

In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager

executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

url = 'https://redplanetscience.com/'
browser.visit(url)
html = browser.html
mars_soup = soup(html, 'html.parser')

[WDM] - Downloading: 100%|██████████| 6.46M/6.46M [00:00<00:00, 35.4MB/s]


In [2]:
# Check element identified with Chrome DevTools exists
browser.is_element_present_by_css('div.list_text', wait_time=1)

True

In [3]:
# News article parent element
article = mars_soup.select_one('div.list_text')

In [10]:
print(article)

<div class="list_text">
<div class="list_date">November 24, 2022</div>
<div class="content_title">Two Rovers to Roll on Mars Again: Curiosity and Mars 2020</div>
<div class="article_teaser_body">They look like twins. But under the hood, the rover currently exploring the Red Planet and the one launching there this summer have distinct science tools and roles to play.</div>
</div>


In [5]:
# retrieve article title
art_title = article.find('div', class_='content_title').text
print(art_title)

Two Rovers to Roll on Mars Again: Curiosity and Mars 2020


In [6]:
#  retrieve article content
art_content = article.find('div', class_='article_teaser_body').text
print(art_content)

They look like twins. But under the hood, the rover currently exploring the Red Planet and the one launching there this summer have distinct science tools and roles to play.


In [7]:
# article title + content dictionary
mars_article = {}
mars_article[art_title] = art_content

In [8]:
mars_article

{'Two Rovers to Roll on Mars Again: Curiosity and Mars 2020': 'They look like twins. But under the hood, the rover currently exploring the Red Planet and the one launching there this summer have distinct science tools and roles to play.'}

In [9]:
# alternative dictionary
mars_dic = {}
mars_dic['title'] = art_title
mars_dic['preview'] = art_content
print(mars_dic)

{'title': 'Two Rovers to Roll on Mars Again: Curiosity and Mars 2020', 'preview': 'They look like twins. But under the hood, the rover currently exploring the Red Planet and the one launching there this summer have distinct science tools and roles to play.'}


## Retriving all first page news articles in a dictionary (title : preview)

In [42]:
#Iterating through news section appending each title and preview to the dictionary
mars_news = {}
news = mars_soup.find_all('div', class_='list_text')
for element in news:
    news_title = element.find('div', class_='content_title').text
    news_preview = element.find('div', class_='article_teaser_body').text
    mars_news[news_title] = news_preview


In [43]:
mars_news

{'Two Rovers to Roll on Mars Again: Curiosity and Mars 2020': 'They look like twins. But under the hood, the rover currently exploring the Red Planet and the one launching there this summer have distinct science tools and roles to play.',
 'Naming a NASA Mars Rover Can Change Your Life': 'Want to name the robotic scientist NASA is sending to Mars in 2020? The student who named Curiosity — the rover currently exploring Mars — will tell you this is an opportunity worth taking.',
 'Global Storms on Mars Launch Dust Towers Into the Sky': 'A Mars Dust Tower Stands Out Dust storms are common on Mars. But every decade or so, something unpredictable happens: a series of runaway storms break out, covering the entire planet in a dusty haze.',
 'Two of a Space Kind: Apollo 12 and Mars 2020': 'Apollo 12 and the upcoming Mars 2020 mission may be separated by half a century, but they share several goals unique in the annals of space exploration.',
 "NASA's Perseverance Rover Will Look at Mars Throug

## BONUS

In [54]:
# To do so, export the scraped data to either  a JSON file or a MongoDB database
import json

mars_news_json = json.dumps(mars_news)


In [55]:
# print json object
print(mars_news_json)

{"Two Rovers to Roll on Mars Again: Curiosity and Mars 2020": "They look like twins. But under the hood, the rover currently exploring the Red Planet and the one launching there this summer have distinct science tools and roles to play.", "Naming a NASA Mars Rover Can Change Your Life": "Want to name the robotic scientist NASA is sending to Mars in 2020? The student who named Curiosity \u2014 the rover currently exploring Mars \u2014 will tell you this is an opportunity worth taking.", "Global Storms on Mars Launch Dust Towers Into the Sky": "A Mars Dust Tower Stands Out Dust storms are common on Mars. But every decade or so, something unpredictable happens: a series of runaway storms break out, covering the entire planet in a dusty haze.", "Two of a Space Kind: Apollo 12 and Mars 2020": "Apollo 12 and the upcoming Mars 2020 mission may be separated by half a century, but they share several goals unique in the annals of space exploration.", "NASA's Perseverance Rover Will Look at Mars 