# STEP 1:  Web Scraping

In [7]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [14]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [15]:
# Define database and collection
db = client.nasa_db
collection = db.items

In [16]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [17]:
# Examine the results, then determine element that contains sought info (News Title)
news_title = soup.find('div', class_='content_title').find('a').text

# Print the scraped information
news_title

"\nMars Helicopter Attached to NASA's Perseverance Rover\n"

In [20]:
# Examine the results, then determine element that contains sought info (News Paragraph)
news_paragraph = soup.find('div', class_='article_teaser_body').text

# Print the scraped information
news_paragraph

AttributeError: 'NoneType' object has no attribute 'text'

In [19]:
# Examine the results, then determine element that contains sought info
# Results are returned as an iterable list
results = soup.find_all('li', class_='slide')

# Loop through returned results
for result in results:
    # Error handling
    try:
        # Identify and return title of listing
        news_title = result.find('div', class_='content_title').find('a').text
        
        # Identify and return paragraph of listing
        news_paragraph = result.find('div', class_='article_teaser_body').text
        
        # Identify and return link to listing
        link = result.a['href']

        # Run only if title, paragraph, and link are available
        if (news_title and news_paragraph and link):
            # Print results
            print('--------------------------------------------------')
            print(news_title)
            print(news_paragraph)
            print(link)

            # Dictionary to be inserted as a MongoDB document
            post = {
                'title': news_title,
                'paragraph': news_paragraph,
                'link': link,
                
            }

            collection.insert_one(post)

    except Exception as e:
        print(e)

In [48]:
# Display items in MongoDB collection
listings = db.items.find()

for listing in listings:
    print(listing)