In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='article-item__top')

# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_='article-item__date')['data-date'] 
    
    # get only the date from the datetime
    date = datetime.split('T')[0]
    
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date,
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Inside look at Colorado Avalanche
Hope for long playoff run with Saad joining core of MacKinnon, Landeskog, Rantanen
2020-11-23
-----------------
Ovechkin can break Gretzky's NHL goals record, Jagr says
Capitals forward, 188 shy of mark, must 'stay healthy and keep working as hard'
2020-11-23
-----------------
DeBrusk signs two-year, $7.35 million contract with Bruins
Forward was restricted free agent, scored 35 points for Boston last season
2020-11-23
-----------------
Julien heart healthy for Canadiens, wants to coach as long as possible
60-year-old left playoffs in August to have procedure
2020-11-23
-----------------
Lundqvist joins Capitals teammates for first time at informal skate
Goalie signed one-year contract Oct. 9 after playing 15 seasons with Rangers
2020-11-23
-----------------
Reverse Retro alternate jerseys for all 31 teams unveiled by NHL, adidas
Will be worn multiple times this season; available for purchase starting Dec. 1
2020-11-16
---------------

In [6]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5fbc519b90aa569e493eacb4'), 'header': 'Inside look at Colorado Avalanche', 'subheader': 'Hope for long playoff run with Saad joining core of MacKinnon, Landeskog, Rantanen', 'date': '2020-11-23'}
{'_id': ObjectId('5fbc519b90aa569e493eacb5'), 'header': "Ovechkin can break Gretzky's NHL goals record, Jagr says", 'subheader': "Capitals forward, 188 shy of mark, must 'stay healthy and keep working as hard'", 'date': '2020-11-23'}
{'_id': ObjectId('5fbc519b90aa569e493eacb6'), 'header': 'DeBrusk signs two-year, $7.35 million contract with Bruins', 'subheader': 'Forward was restricted free agent, scored 35 points for Boston last season', 'date': '2020-11-23'}
{'_id': ObjectId('5fbc519b90aa569e493eacb7'), 'header': 'Julien heart healthy for Canadiens, wants to coach as long as possible', 'subheader': '60-year-old left playoffs in August to have procedure', 'date': '2020-11-23'}
{'_id': ObjectId('5fbc519b90aa569e493eacb8'), 'header': 'Lundqvist joins Capitals teammates for fir