In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='article-item__top')

# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_='article-item__date')['data-date'] 
    
    # get only the date from the datetime
    date = datetime.split('T')[0]
    
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date,
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Stars seek 'different level' in Game 4 of Cup Final
Top players know they need to be better to even series against Lightning
2020-09-24
-----------------
Lightning, Stars eyeing Game 4 of Cup Final rather than back-to-back
Game 5 thoughts can wait with Tampa Bay aiming to extend series lead against Dallas
2020-09-24
-----------------
Hornqvist traded to Panthers by Penguins for Matheson, Sceviour
Forward, two-time Cup winner with Pittsburgh, acquired for defenseman, center
2020-09-24
-----------------
Hedman closing in on history for Lightning entering Game 4 of Cup Final
Two away from goals record for defenseman in single postseason, has grown into 'beast'
2020-09-24
-----------------
Stamkos' brief stint enough to inspire Lightning in Game 3 win
Scores in return before leaving in first to help take lead in Cup Final against Stars
2020-09-24
-----------------
Stanley Cup Final schedule
Will be played in Edmonton, includes back-to-back Games 4-5
2020-09-23
-----------

In [6]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5f6d338f0a5cbc8bf1fbb919'), 'header': "Stars seek 'different level' in Game 4 of Cup Final", 'subheader': 'Top players know they need to be better to even series against Lightning', 'date': '2020-09-24'}
{'_id': ObjectId('5f6d338f0a5cbc8bf1fbb91a'), 'header': 'Lightning, Stars eyeing Game 4 of Cup Final rather than back-to-back', 'subheader': 'Game 5 thoughts can wait with Tampa Bay aiming to extend series lead against Dallas', 'date': '2020-09-24'}
{'_id': ObjectId('5f6d338f0a5cbc8bf1fbb91b'), 'header': 'Hornqvist traded to Panthers by Penguins for Matheson, Sceviour', 'subheader': 'Forward, two-time Cup winner with Pittsburgh, acquired for defenseman, center', 'date': '2020-09-24'}
{'_id': ObjectId('5f6d338f0a5cbc8bf1fbb91c'), 'header': 'Hedman closing in on history for Lightning entering Game 4 of Cup Final', 'subheader': "Two away from goals record for defenseman in single postseason, has grown into 'beast'", 'date': '2020-09-24'}
{'_id': ObjectId('5f6d338f0a5cbc8