In [7]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
import pymongo

In [13]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [14]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [15]:
# URL of page to be scraped
url = 'https://www.nhl.com/'

In [18]:
# Retrieve page with the requests module
response = requests.get(url).text

In [19]:
# Create BeautifulSoup object; parse with 'lxml'
soup = bs(response, 'lxml')

In [21]:
# Retrieve the parent divs for all articles
results = soup.find_all('li', class_="mixed-feed__item--article")

# Loop through results to retrieve article title, header, and timestamp of article
for result in results:
    title = result.find('h4', class_='mixed-feed__header').text

    lede = result.find('h5', class_='mixed-feed__subheader').text

    # The time and date of article publication
    date = result.find('time')['datetime']
    
    # Slice the datetime string for the date
    article_date = date[:10]
    
    # Slice the datetime string for the time
    time = date[11:16]
    
    # Determine whether article was published in AM or PM
    if (int(time[:2]) >= 13):
        meridiem = 'pm'
    else:
        meridiem = 'am'

    # Concatenate time string
    time = time + meridiem
    print('-----------------')
    print(title)
    print(lede)
    print(article_date)
    print(time)

    # Dictionary to be inserted into MongoDB
    post = {
        'title': title,
        'lede': lede,
        'date': article_date,
        'time_published': time
    }
    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Rinne placed on injured reserve by Predators
Goalie left game Friday after collision with teammate
2018-10-20
14:18pm
-----------------
Fantasy hockey news, lineup, injury tracker
Skinner, Pominville moved to Sabres' top line with Eichel; Blue Jackets could get Jones back soon
2018-10-20
13:45pm
-----------------
Tootoo retires after 13 NHL seasons
Forward played for Predators, Red Wings, Devils, Blackhawks
2018-10-19
19:25pm
-----------------
NHL.com player poll: Toughest goalie on breakaway
Price, Rinne, Quick among picks for best in 1-on-1
2018-10-19
00:00am
-----------------
Projected lineups, starting goalies for today
Updates from NHL.com writers, correspondents
2018-10-20
09:55am
-----------------
Fantasy team power rankings for 2018-19
Galchenyuk's return should boost Coyotes power play; Kinkaid, Palmieri sell-high candidates for Devils
2018-10-19
18:01pm
-----------------
Hockey Night in Canada: 5 Storylines
Maple Leafs try to get back on track against Blues;

In [22]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5bcb7a2656093b0dbfe0a6b8'), 'title': 'Rinne placed on injured reserve by Predators', 'lede': 'Goalie left game Friday after collision with teammate', 'date': '2018-10-20', 'time_published': '14:18pm'}
{'_id': ObjectId('5bcb7a2656093b0dbfe0a6b9'), 'title': 'Fantasy hockey news, lineup, injury tracker', 'lede': "Skinner, Pominville moved to Sabres' top line with Eichel; Blue Jackets could get Jones back soon", 'date': '2018-10-20', 'time_published': '13:45pm'}
{'_id': ObjectId('5bcb7a2656093b0dbfe0a6ba'), 'title': 'Tootoo retires after 13 NHL seasons', 'lede': 'Forward played for Predators, Red Wings, Devils, Blackhawks', 'date': '2018-10-19', 'time_published': '19:25pm'}
{'_id': ObjectId('5bcb7a2656093b0dbfe0a6bb'), 'title': 'NHL.com player poll: Toughest goalie on breakaway', 'lede': 'Price, Rinne, Quick among picks for best in 1-on-1', 'date': '2018-10-19', 'time_published': '00:00am'}
{'_id': ObjectId('5bcb7a2656093b0dbfe0a6bc'), 'title': 'Projected lineups, startin