In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
import datetime

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [9]:
# Retrieve the parent divs for all articles
results = soup.find_all('li', class_='mixed-feed__item--article')

# Loop through results to retrieve article title, header, and timestamp of article
for result in results:
    title = result.find('h4', class_='mixed-feed__header').text

    lede = result.find('h5', class_='mixed-feed__subheader').text

    # The time and date of article publication
    dt_str = result.find('time')['datetime']
    dt = datetime.datetime.strptime(dt_str, '%Y-%m-%dT%H:%M:%S%z')
    article_date = dt.strftime('%Y-%m-%d')
    time = dt.strftime('%I:%M%p')

    print('-----------------')
    print(title)
    print(lede)
    print(article_date)
    print(time)

    # Dictionary to be inserted into MongoDB
    post = {
        'title': title,
        'lede': lede,
        'date': article_date,
        'time_published': time
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Karlsson attends Senators golf tournament
Defenseman, subject of trade rumors, does not speak to media
2018-09-12
05:12PM
-----------------
Tortorella gets two-year contract extension with Blue Jackets
Winningest U.S.-born coach in NHL history signed through 2020-21
2018-09-12
03:53PM
-----------------
Fantasy top 250 rankings for 2018-19
McDavid, Ovechkin lead consensus list; impact of Pacioretty trade on Golden Knights, Canadiens
2018-09-11
06:15PM
-----------------
Wennberg eyes bounce-back season for Blue Jackets
Columbus center embracing chance 'to be better than I showed last year'
2018-09-13
12:00AM
-----------------
Gibson's alternate mask honors Pittsburgh, Ducks championship team
Anaheim goalie goalie pays homage to hometown, 2006-07 team with 25th anniversary helmet
2018-09-12
07:55PM
-----------------
Johansen, Predators out to prove they can win Cup
Center says team can learn from falling short after winning Presidents' Trophy last season
2018-09-12
11:08

In [8]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5b99e671b84a80dc3fadc8a6'), 'title': 'Karlsson attends Senators golf tournament', 'lede': 'Defenseman, subject of trade rumors, does not speak to media', 'date': '2018-09-12', 'time_published': '05:12PM'}
{'_id': ObjectId('5b99e671b84a80dc3fadc8a7'), 'title': 'Tortorella gets two-year contract extension with Blue Jackets', 'lede': 'Winningest U.S.-born coach in NHL history signed through 2020-21', 'date': '2018-09-12', 'time_published': '03:53PM'}
{'_id': ObjectId('5b99e671b84a80dc3fadc8a8'), 'title': 'Fantasy top 250 rankings for 2018-19', 'lede': 'McDavid, Ovechkin lead consensus list; impact of Pacioretty trade on Golden Knights, Canadiens', 'date': '2018-09-11', 'time_published': '06:15PM'}
{'_id': ObjectId('5b99e671b84a80dc3fadc8a9'), 'title': 'Wennberg eyes bounce-back season for Blue Jackets', 'lede': "Columbus center embracing chance 'to be better than I showed last year'", 'date': '2018-09-13', 'time_published': '12:00AM'}
{'_id': ObjectId('5b99e671b84a80dc3f