In [1]:
# Importing the necessary packages for this project

In [2]:
import numpy as np

In [3]:
import pandas as pd

In [4]:
import matplotlib as plt

In [5]:
import seaborn as sns

In [6]:
import requests

In [7]:
import json

In [8]:
# Import the relevant dataset
# For the scope of this project, I will be limiting data to the modern era (1992-present). Data will be accessed via the NHL's API

In [9]:
# The NHL API does not allow a straight output of player IDs. Instead, the player IDs must be derived from the roster info

teams_url = 'https://statsapi.web.nhl.com/api/v1/teams' # this is the endpoint for team data within the NHL API
teams_resp = requests.get(teams_url) 
teams_json = teams_resp.json() # turn the response object into JSON

In [10]:
print(teams_json)

{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2022. All Rights Reserved.', 'teams': [{'id': 1, 'name': 'New Jersey Devils', 'link': '/api/v1/teams/1', 'venue': {'name': 'Prudential Center', 'link': '/api/v1/venues/null', 'city': 'Newark', 'timeZone': {'id': 'America/New_York', 'offset': -5, 'tz': 'EST'}}, 'abbreviation': 'NJD', 'teamName': 'Devils', 'locationName': 'New Jersey', 'firstYearOfPlay': '1982', 'division': {'id': 18, 'name': 'Metropolitan', 'nameShort': 'Metro', 'link': '/api/v1/divisions/18', 'abbreviation': 'M'}, 'conference': {'id': 6, 'name': 'Eastern', 'link': '/api/v1/conferences/6'}, 'franchise': {'franchiseId': 23, 'teamName': 'Devils', 'link': '/api/v1/franchises/23'}, 'shortName': 'New Jersey', 'officialSiteUrl': 'http://www.newjerseydevils.com/', 'franchiseId': 23, 'active': True}, {'id': 2, 'name': 'New York Islanders', 'link': '/api/v1/teams/2

In [11]:
type(teams_json['teams']) # check type of teams key

list

In [12]:
teams_json['teams'][0] # viewing attributes associated with "team".

{'id': 1,
 'name': 'New Jersey Devils',
 'link': '/api/v1/teams/1',
 'venue': {'name': 'Prudential Center',
  'link': '/api/v1/venues/null',
  'city': 'Newark',
  'timeZone': {'id': 'America/New_York', 'offset': -5, 'tz': 'EST'}},
 'abbreviation': 'NJD',
 'teamName': 'Devils',
 'locationName': 'New Jersey',
 'firstYearOfPlay': '1982',
 'division': {'id': 18,
  'name': 'Metropolitan',
  'nameShort': 'Metro',
  'link': '/api/v1/divisions/18',
  'abbreviation': 'M'},
 'conference': {'id': 6, 'name': 'Eastern', 'link': '/api/v1/conferences/6'},
 'franchise': {'franchiseId': 23,
  'teamName': 'Devils',
  'link': '/api/v1/franchises/23'},
 'shortName': 'New Jersey',
 'officialSiteUrl': 'http://www.newjerseydevils.com/',
 'franchiseId': 23,
 'active': True}

In [13]:
# the teams dictionary contains other nested dictionaries. Applying the below function to flatten the structure for use in Pandas.

def flatten_team(nested):
    flat = {key: value for key, value in nested.items()
           if type(value) is not dict}
    
    flat['venue_name'] = nested['venue']['name']
    flat['venue_city'] = nested['venue']['city']
    flat['franchise_id'] = nested['franchise']['franchiseId']
    flat['division_id'] = nested['division']['id']
    flat['conference_id'] = nested['conference']['id']
    return flat


In [14]:
# Passing the flattened teams to a dataframe

df_teams = pd.DataFrame([flatten_team(x) for x in teams_json['teams']])

In [15]:
df_teams.head()

Unnamed: 0,id,name,link,abbreviation,teamName,locationName,firstYearOfPlay,shortName,officialSiteUrl,franchiseId,active,venue_name,venue_city,franchise_id,division_id,conference_id
0,1,New Jersey Devils,/api/v1/teams/1,NJD,Devils,New Jersey,1982,New Jersey,http://www.newjerseydevils.com/,23,True,Prudential Center,Newark,23,18,6
1,2,New York Islanders,/api/v1/teams/2,NYI,Islanders,New York,1972,NY Islanders,http://www.newyorkislanders.com/,22,True,UBS Arena,Elmont,22,18,6
2,3,New York Rangers,/api/v1/teams/3,NYR,Rangers,New York,1926,NY Rangers,http://www.newyorkrangers.com/,10,True,Madison Square Garden,New York,10,18,6
3,4,Philadelphia Flyers,/api/v1/teams/4,PHI,Flyers,Philadelphia,1967,Philadelphia,http://www.philadelphiaflyers.com/,16,True,Wells Fargo Center,Philadelphia,16,18,6
4,5,Pittsburgh Penguins,/api/v1/teams/5,PIT,Penguins,Pittsburgh,1967,Pittsburgh,http://pittsburghpenguins.com/,17,True,PPG Paints Arena,Pittsburgh,17,18,6


In [16]:
df_teams.set_index('id', inplace=True) 

In [17]:
df_teams.drop('officialSiteUrl', axis=1, inplace=True)
df_teams.drop('link', axis=1, inplace=True)
df_teams.head()

Unnamed: 0_level_0,name,abbreviation,teamName,locationName,firstYearOfPlay,shortName,franchiseId,active,venue_name,venue_city,franchise_id,division_id,conference_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,New Jersey Devils,NJD,Devils,New Jersey,1982,New Jersey,23,True,Prudential Center,Newark,23,18,6
2,New York Islanders,NYI,Islanders,New York,1972,NY Islanders,22,True,UBS Arena,Elmont,22,18,6
3,New York Rangers,NYR,Rangers,New York,1926,NY Rangers,10,True,Madison Square Garden,New York,10,18,6
4,Philadelphia Flyers,PHI,Flyers,Philadelphia,1967,Philadelphia,16,True,Wells Fargo Center,Philadelphia,16,18,6
5,Pittsburgh Penguins,PIT,Penguins,Pittsburgh,1967,Pittsburgh,17,True,PPG Paints Arena,Pittsburgh,17,18,6


In [18]:
# Now that I have the team information, I will search for roster information

rosters_url = 'https://statsapi.web.nhl.com/api/v1/teams?expand=team.roster' # this modifier to the teams API endpoint shows the active roster for each team
rosters_resp = requests.get(rosters_url) 
rosters_json = rosters_resp.json() # turn the response object into JSON

In [19]:
rosters_json

{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2022. All Rights Reserved.',
 'teams': [{'id': 1,
   'name': 'New Jersey Devils',
   'link': '/api/v1/teams/1',
   'venue': {'name': 'Prudential Center',
    'link': '/api/v1/venues/null',
    'city': 'Newark',
    'timeZone': {'id': 'America/New_York', 'offset': -5, 'tz': 'EST'}},
   'abbreviation': 'NJD',
   'teamName': 'Devils',
   'locationName': 'New Jersey',
   'firstYearOfPlay': '1982',
   'division': {'id': 18,
    'name': 'Metropolitan',
    'nameShort': 'Metro',
    'link': '/api/v1/divisions/18',
    'abbreviation': 'M'},
   'conference': {'id': 6, 'name': 'Eastern', 'link': '/api/v1/conferences/6'},
   'franchise': {'franchiseId': 23,
    'teamName': 'Devils',
    'link': '/api/v1/franchises/23'},
   'roster': {'roster': [{'person': {'id': 8473541,
       'fullName': 'Jonathan Bernier',
       'link': '/api/v1

In [20]:
rosters_json['teams'][0] 
# viewing the attributes associated with the rosters dictionary. 
#From this, I am particularly interested in the "person" dictionary, from which I can take each player's unique ID.

{'id': 1,
 'name': 'New Jersey Devils',
 'link': '/api/v1/teams/1',
 'venue': {'name': 'Prudential Center',
  'link': '/api/v1/venues/null',
  'city': 'Newark',
  'timeZone': {'id': 'America/New_York', 'offset': -5, 'tz': 'EST'}},
 'abbreviation': 'NJD',
 'teamName': 'Devils',
 'locationName': 'New Jersey',
 'firstYearOfPlay': '1982',
 'division': {'id': 18,
  'name': 'Metropolitan',
  'nameShort': 'Metro',
  'link': '/api/v1/divisions/18',
  'abbreviation': 'M'},
 'conference': {'id': 6, 'name': 'Eastern', 'link': '/api/v1/conferences/6'},
 'franchise': {'franchiseId': 23,
  'teamName': 'Devils',
  'link': '/api/v1/franchises/23'},
 'roster': {'roster': [{'person': {'id': 8473541,
     'fullName': 'Jonathan Bernier',
     'link': '/api/v1/people/8473541'},
    'jerseyNumber': '45',
    'position': {'code': 'G',
     'name': 'Goalie',
     'type': 'Goalie',
     'abbreviation': 'G'}},
   {'person': {'id': 8476292,
     'fullName': 'Ondrej Palat',
     'link': '/api/v1/people/8476292'},

In [21]:
firstrosterlist = rosters_json['teams'][0]

In [22]:
firstroster = firstrosterlist['roster']['roster']

In [23]:
# again, the roster dictionary contains other nested dictionaries. Applying the below function to flatten the structure for use in Pandas

def flatten_player(nested):
    flat = {}
    flat['person_id'] = nested['person']['id']
    flat['name'] = nested['person']['fullName']
    flat['jersey'] = nested['jerseyNumber']
    flat['position'] = nested['position']['code']
    return flat


In [24]:
firstroster_df = pd.DataFrame([flatten_player(x) for x in firstroster])

In [25]:
firstroster_df

Unnamed: 0,person_id,name,jersey,position
0,8473541,Jonathan Bernier,45,G
1,8476292,Ondrej Palat,18,L
2,8478406,Mackenzie Blackwood,29,G
3,8482076,Nico Daws,50,G
4,8474090,Brendan Smith,2,D
5,8475193,Tomas Tatar,90,L
6,8475287,Erik Haula,56,L
7,8476462,Dougie Hamilton,7,D
8,8476923,Damon Severson,28,D
9,8477425,Miles Wood,44,L


In [26]:

def process_roster1(team_dict):
    roster = team_dict['roster']['roster']
    roster = pd.DataFrame([flatten_player(x) for x in roster])
    return roster


In [27]:
rosters_2 = process_roster1(firstrosterlist)
rosters_2.head()

Unnamed: 0,person_id,name,jersey,position
0,8473541,Jonathan Bernier,45,G
1,8476292,Ondrej Palat,18,L
2,8478406,Mackenzie Blackwood,29,G
3,8482076,Nico Daws,50,G
4,8474090,Brendan Smith,2,D


In [28]:
# UPDATE THIS

def process_roster2(team_dict):
    roster = team_dict['roster']['roster']
    df = pd.DataFrame([flatten_player(x) for x in roster])
    df['team_id'] = team_dict['id']
    df['team_name'] = team_dict['name']
    return df

In [29]:
#UPDATE THIS

df_nj3 = process_roster2(firstrosterlist)
df_nj3.head()

Unnamed: 0,person_id,name,jersey,position,team_id,team_name
0,8473541,Jonathan Bernier,45,G,1,New Jersey Devils
1,8476292,Ondrej Palat,18,L,1,New Jersey Devils
2,8478406,Mackenzie Blackwood,29,G,1,New Jersey Devils
3,8482076,Nico Daws,50,G,1,New Jersey Devils
4,8474090,Brendan Smith,2,D,1,New Jersey Devils


In [30]:
league_rosters = pd.concat([process_roster2(x) for x in rosters_json['teams']], ignore_index=True)

In [31]:
league_rosters.sample(10)

Unnamed: 0,person_id,name,jersey,position,team_id,team_name
737,8479982,Conor Timmins,25,D,53,Arizona Coyotes
497,8477406,Mattias Janmark,26,C,22,Edmonton Oilers
198,8474207,Nick Holden,5,D,9,Ottawa Senators
264,8480829,Jesperi Kotkaniemi,82,C,12,Carolina Hurricanes
648,8476374,Sean Kuraly,7,C,29,Columbus Blue Jackets
301,8477426,Nicholas Paul,20,L,14,Tampa Bay Lightning
526,8478444,Brock Boeser,6,R,23,Vancouver Canucks
469,8473446,Erik Johnson,6,D,21,Colorado Avalanche
305,8478416,Erik Cernak,81,D,14,Tampa Bay Lightning
667,8477451,Ryan Hartman,38,R,30,Minnesota Wild


In [32]:
# The NHL API player stats endpoint does not allow you to request information for all players. Instead it requires you to use a single player's ID.
# I will use Steven Stamkos - star player and captain of Tampa Bay - Player ID 8474564

player_id = 8474564

In [33]:
# I will obtain the detailed player statistics on a season by season basis
stats_url = f'https://statsapi.web.nhl.com/api/v1/people/{player_id}/stats?stats=yearByYear' # this is the endpoint for player statistical data within the NHL API, to which I am passing my player_id

In [34]:
stamkos_stats_resp = requests.get(stats_url) 
stamkos_stats_json = stamkos_stats_resp.json() # turn the response object into JSON

In [35]:
stamkos_stats_json

{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2022. All Rights Reserved.',
 'stats': [{'type': {'displayName': 'yearByYear', 'gameType': None},
   'splits': [{'season': '20052006',
     'stat': {'timeOnIce': '00:00',
      'assists': 92,
      'goals': 105,
      'pim': 87,
      'games': 66,
      'powerPlayTimeOnIce': '00:00',
      'evenTimeOnIce': '00:00',
      'penaltyMinutes': '87',
      'faceOffPct': 0.0,
      'shortHandedTimeOnIce': '00:00',
      'points': 197,
      'shifts': 0},
     'team': {'name': 'Markham', 'link': '/api/v1/teams/null'},
     'league': {'name': 'Minor-ON', 'link': '/api/v1/league/null'},
     'sequenceNumber': 1},
    {'season': '20062007',
     'stat': {'assists': 50,
      'goals': 42,
      'pim': 56,
      'games': 63,
      'powerPlayGoals': 22,
      'penaltyMinutes': '56',
      'faceOffPct': 0.0,
      'shortHandedGoals': 0,

In [36]:
stamkos_stats = stamkos_stats_json['stats'][0]['splits'][0]
stamkos_stats

{'season': '20052006',
 'stat': {'timeOnIce': '00:00',
  'assists': 92,
  'goals': 105,
  'pim': 87,
  'games': 66,
  'powerPlayTimeOnIce': '00:00',
  'evenTimeOnIce': '00:00',
  'penaltyMinutes': '87',
  'faceOffPct': 0.0,
  'shortHandedTimeOnIce': '00:00',
  'points': 197,
  'shifts': 0},
 'team': {'name': 'Markham', 'link': '/api/v1/teams/null'},
 'league': {'name': 'Minor-ON', 'link': '/api/v1/league/null'},
 'sequenceNumber': 1}

In [37]:
def flatten_player_year_stats(stats_dict):
    stats_flat = stats_dict['stat']
    stats_flat['season'] = stats_dict['season']
    stats_flat['team'] = stats_dict['team']['name']
    stats_flat['league'] = stats_dict['league']['name']
    return stats_flat

In [38]:
flatten_player_year_stats(stamkos_stats)

{'timeOnIce': '00:00',
 'assists': 92,
 'goals': 105,
 'pim': 87,
 'games': 66,
 'powerPlayTimeOnIce': '00:00',
 'evenTimeOnIce': '00:00',
 'penaltyMinutes': '87',
 'faceOffPct': 0.0,
 'shortHandedTimeOnIce': '00:00',
 'points': 197,
 'shifts': 0,
 'season': '20052006',
 'team': 'Markham',
 'league': 'Minor-ON'}

In [39]:
stamkos_stats_df = pd.DataFrame([flatten_player_year_stats(x) for x in stamkos_stats_json['stats'][0]['splits']])

In [40]:
stamkos_stats_df

Unnamed: 0,timeOnIce,assists,goals,pim,games,powerPlayTimeOnIce,evenTimeOnIce,penaltyMinutes,faceOffPct,shortHandedTimeOnIce,...,shortHandedGoals,plusMinus,gameWinningGoals,shots,hits,powerPlayPoints,shotPct,overTimeGoals,shortHandedPoints,blocked
0,00:00,92,105,87,66,00:00,00:00,87,0.0,00:00,...,,,,,,,,,,
1,,50,42,56,63,,,56,0.0,,...,0.0,13.0,,,,,,,,
2,00:00,8,2,8,6,00:00,00:00,8,0.0,00:00,...,,,,,,,,,,
3,,47,58,88,61,,,88,,,...,5.0,18.0,,,,,,,,
4,,5,1,4,7,,,4,,,...,0.0,1.0,0.0,,,,,,,
5,1179:51,23,23,39,79,223:57,946:37,39,45.42,09:17,...,0.0,-13.0,1.0,181.0,75.0,17.0,12.71,0.0,0.0,17.0
6,,4,7,6,9,,,6,0.0,,...,,,,,,,,,,
7,1685:14,44,51,38,82,381:18,1193:22,38,47.91,110:34,...,1.0,-2.0,5.0,297.0,68.0,41.0,17.17,1.0,1.0,38.0
8,,1,2,10,5,,,10,0.0,,...,,,,,,,,,,
9,1655:56,46,45,74,82,372:53,1256:29,74,46.49,26:34,...,0.0,3.0,8.0,272.0,84.0,36.0,16.54,1.0,0.0,37.0


In [47]:
# Some values are NaN as not all leagues that Stamkos has played in record the same stats
stamkos_stats_df.isnull().sum()

timeOnIce               7
assists                 0
goals                   0
pim                     0
games                   0
powerPlayTimeOnIce      7
evenTimeOnIce           7
penaltyMinutes          0
faceOffPct              4
shortHandedTimeOnIce    7
points                  0
shifts                  7
season                  0
team                    0
league                  0
powerPlayGoals          5
shortHandedGoals        5
plusMinus               4
gameWinningGoals        7
shots                   9
hits                    9
powerPlayPoints         9
shotPct                 9
overTimeGoals           9
shortHandedPoints       9
blocked                 9
dtype: int64

In [48]:
stamkos_stats_df.fillna(0)

Unnamed: 0,timeOnIce,assists,goals,pim,games,powerPlayTimeOnIce,evenTimeOnIce,penaltyMinutes,faceOffPct,shortHandedTimeOnIce,...,shortHandedGoals,plusMinus,gameWinningGoals,shots,hits,powerPlayPoints,shotPct,overTimeGoals,shortHandedPoints,blocked
0,00:00,92,105,87,66,00:00,00:00,87,0.0,00:00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,50,42,56,63,0,0,56,0.0,0,...,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,00:00,8,2,8,6,00:00,00:00,8,0.0,00:00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,47,58,88,61,0,0,88,0.0,0,...,5.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,5,1,4,7,0,0,4,0.0,0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,1179:51,23,23,39,79,223:57,946:37,39,45.42,09:17,...,0.0,-13.0,1.0,181.0,75.0,17.0,12.71,0.0,0.0,17.0
6,0,4,7,6,9,0,0,6,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,1685:14,44,51,38,82,381:18,1193:22,38,47.91,110:34,...,1.0,-2.0,5.0,297.0,68.0,41.0,17.17,1.0,1.0,38.0
8,0,1,2,10,5,0,0,10,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,1655:56,46,45,74,82,372:53,1256:29,74,46.49,26:34,...,0.0,3.0,8.0,272.0,84.0,36.0,16.54,1.0,0.0,37.0


In [50]:
# Viewing all the columns available
(stamkos_stats_df.columns.tolist())

['timeOnIce',
 'assists',
 'goals',
 'pim',
 'games',
 'powerPlayTimeOnIce',
 'evenTimeOnIce',
 'penaltyMinutes',
 'faceOffPct',
 'shortHandedTimeOnIce',
 'points',
 'shifts',
 'season',
 'team',
 'league',
 'powerPlayGoals',
 'shortHandedGoals',
 'plusMinus',
 'gameWinningGoals',
 'shots',
 'hits',
 'powerPlayPoints',
 'shotPct',
 'overTimeGoals',
 'shortHandedPoints',
 'blocked']

In [42]:
stamkos_stats_df[['season', 'team','league','assists']]

Unnamed: 0,season,team,league,assists
0,20052006,Markham,Minor-ON,92
1,20062007,Sarnia,OHL,50
2,20062007,Canada,WJ18-A,8
3,20072008,Sarnia,OHL,47
4,20072008,Canada,WJC-A,5
5,20082009,Tampa Bay Lightning,National Hockey League,23
6,20082009,Canada,WC-A,4
7,20092010,Tampa Bay Lightning,National Hockey League,44
8,20092010,Canada,WC-A,1
9,20102011,Tampa Bay Lightning,National Hockey League,46
