In [3]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [4]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [5]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [6]:
# URL of page to be scraped
url = 'https://www.nhl.com/'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [7]:
# Retrieve the parent divs for all articles
results = soup.find_all('li', class_='mixed-feed__item--article')

# Loop through results to retrieve article title, header, and timestamp of article
for result in results:
    title = result.find('h4', class_='mixed-feed__header').text

    lede = result.find('h5', class_='mixed-feed__subheader').text

    # The time and date of article publication
    date = result.find('time')['datetime']
    # Slice the datetime string for the date
    article_date = date[:10]
    # Slice the datetime string for the time
    time = date[11:16]
    # Determine whether article was published in AM or PM
    if (int(time[:2]) >= 13):
        meridiem = 'pm'
    else:
        meridiem = 'am'

    # Concatenate time string
    time = time + meridiem
    print('-----------------')
    print(title)
    print(lede)
    print(article_date)
    print(time)

    # Dictionary to be inserted into MongoDB
    post = {
        'title': title,
        'lede': lede,
        'date': article_date,
        'time published': time
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Thornton expected to return with Sharks, Meier says
40-year-old forward is unrestricted free agent
2019-08-15
13:10pm
-----------------
Aho thankful for contract with Hurricanes after Canadiens' offer sheet
Center says process helped ensure he'd start training camp on time
2019-08-15
13:50pm
-----------------
Marner, Maple Leafs will work out contract, Nylander says
Forward went through same situation last offseason as restricted free agent, says 'it just takes time'
2019-08-15
13:51pm
-----------------
Hedman says 'body is great' ahead of training camp with Lightning
Defenseman, who was finalist for Norris Trophy, missed 14 games last season because of injuries
2019-08-15
12:31am
-----------------
Top prospects for Minnesota Wild
Sturm expected to vie for roster spot; Boldy set to attend Boston College
2019-08-15
00:00am
-----------------
Marleau, Thornton skate at Sharks practice site
Unrestricted free agent forwards each hoping to play 22nd NHL season
2019-08-14
12

In [8]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5d55f6686fcf8b8106752ad7'), 'title': 'Thornton expected to return with Sharks, Meier says', 'lede': '40-year-old forward is unrestricted free agent', 'date': '2019-08-15', 'time published': '13:10pm'}
{'_id': ObjectId('5d55f6686fcf8b8106752ad8'), 'title': "Aho thankful for contract with Hurricanes after Canadiens' offer sheet", 'lede': "Center says process helped ensure he'd start training camp on time", 'date': '2019-08-15', 'time published': '13:50pm'}
{'_id': ObjectId('5d55f6686fcf8b8106752ad9'), 'title': 'Marner, Maple Leafs will work out contract, Nylander says', 'lede': "Forward went through same situation last offseason as restricted free agent, says 'it just takes time'", 'date': '2019-08-15', 'time published': '13:51pm'}
{'_id': ObjectId('5d55f6686fcf8b8106752ada'), 'title': "Hedman says 'body is great' ahead of training camp with Lightning", 'lede': 'Defenseman, who was finalist for Norris Trophy, missed 14 games last season because of injuries', 'date': '20