In [1]:
# Import the dependencies.
import pandas as pd
from copy import deepcopy
import requests
import matplotlib.pyplot as plt

In [2]:
# Set seasons for which to pull data - Takes into account previous seasons to help show best point haul
season1 = 20192020
season2 = season1 - 10001


season1 = str(season1)
season2 = str(season2)


In [3]:
# Create endpoint URL for team info
team_url = 'https://statsapi.web.nhl.com/api/v1/teams'

# Run API request for info on all teams
team_info = requests.get(team_url).json()

# Parse JSON to retrieve IDs for all active teams
team_data = []

for team in team_info["teams"]:
    team_data.append({"ID": str(team["id"]),
                      "Team": team["abbreviation"]})
team_data

[{'ID': '1', 'Team': 'NJD'},
 {'ID': '2', 'Team': 'NYI'},
 {'ID': '3', 'Team': 'NYR'},
 {'ID': '4', 'Team': 'PHI'},
 {'ID': '5', 'Team': 'PIT'},
 {'ID': '6', 'Team': 'BOS'},
 {'ID': '7', 'Team': 'BUF'},
 {'ID': '8', 'Team': 'MTL'},
 {'ID': '9', 'Team': 'OTT'},
 {'ID': '10', 'Team': 'TOR'},
 {'ID': '12', 'Team': 'CAR'},
 {'ID': '13', 'Team': 'FLA'},
 {'ID': '14', 'Team': 'TBL'},
 {'ID': '15', 'Team': 'WSH'},
 {'ID': '16', 'Team': 'CHI'},
 {'ID': '17', 'Team': 'DET'},
 {'ID': '18', 'Team': 'NSH'},
 {'ID': '19', 'Team': 'STL'},
 {'ID': '20', 'Team': 'CGY'},
 {'ID': '21', 'Team': 'COL'},
 {'ID': '22', 'Team': 'EDM'},
 {'ID': '23', 'Team': 'VAN'},
 {'ID': '24', 'Team': 'ANA'},
 {'ID': '25', 'Team': 'DAL'},
 {'ID': '26', 'Team': 'LAK'},
 {'ID': '28', 'Team': 'SJS'},
 {'ID': '29', 'Team': 'CBJ'},
 {'ID': '30', 'Team': 'MIN'},
 {'ID': '52', 'Team': 'WPG'},
 {'ID': '53', 'Team': 'ARI'},
 {'ID': '54', 'Team': 'VGK'},
 {'ID': '55', 'Team': 'SEA'}]

In [4]:
# Creating list for player data
player_data = []

# Run API call for each team to get their roster information, then parse roster JSON to retrieve player data
for team in team_data:
    
    # Creating URL for team roster
    roster_url = team_url + "/" + team['ID'] + "/roster"
    
    # API call for team roster in JSON format
    roster_info = requests.get(roster_url).json()
    
    # Retrieving data for each player on roster
    for player in roster_info['roster']:
        player_data.append({'player_id': str(player['person']['id']),
                            'Player Name': player['person']['fullName'], 
                            'Team': team['Team'],
                            'Position': player['position']['abbreviation']})

In [5]:
# Separate goalies from skaters
goalie_data = []
skater_data = []

for player in player_data:
    if player['Position'] == 'G':
        goalie_data.append(player)
    else:
        skater_data.append(player)

In [6]:
# Looping through every goalie and adding stats from last season
for player in goalie_data:
    
    # Creating URL for API call for a specific player's stats in a specific season
    player_url = "https://statsapi.web.nhl.com/api/v1/people/" + player['player_id'] + "/stats?stats=statsSingleSeason&season=" + season1
    
    try:
        # API call for player stats in JSON format
        player_info = requests.get(player_url).json()
        player_stats = player_info['stats'][0]['splits'][0]['stat']

        # Adding selected stats to each player
        player["Games"] = player_stats['games']
        player["Games Started"] = player_stats['gamesStarted']
        player["W"] = player_stats['wins']
        player["GAA"] = player_stats['goalAgainstAverage']
        player["SV%"] = player_stats['savePercentage']

        
    except:
        # Add stats as 0 if player not found or if player did not play that season
        player["Games"] = 0
        player["Games Started"] = 0
        player["W"] = 0
        player["GAA"] = 0      
        player["SV%"] = 0

In [7]:
# Building function to get skater stats for a specific season
def get_skater_stats(data, season):
    
    # Looping through every skater and adding stats from season
    for player in data:

        # Creating URL for API call for a specific player's stats in a specific season
        player_url = "https://statsapi.web.nhl.com/api/v1/people/" + player['player_id'] + "/stats?stats=statsSingleSeason&season=" + season

        try:
            # API call for player stats in JSON format
            player_info = requests.get(player_url).json()
            player_stats = player_info['stats'][0]['splits'][0]['stat']

            # Adding selected stats to each player
            player["Games"] = player_stats['games']
            player["G"] = player_stats['goals']
            player["A"] = player_stats['assists']
            player["Points"] = player_stats['points']
            player["+/-"] = player_stats['plusMinus']
            player["PIM"] = int(player_stats['penaltyMinutes'])
            player["SOG"] = player_stats['shots']
            player["PPP"] = player_stats['powerPlayPoints']
            
        except:
            # Add stats as 0 if player not found or if player did not play that season
            player["Games"] = 0
            player["G"] = 0
            player["A"] = 0
            player["Points"] = 0
            player["+/-"] = 0
            player["PIM"] = 0
            player["SOG"] = 0
            player["PPP"] = 0
    
    return data

In [8]:
# Get skater stats for most recent season
skater_data_season1 = get_skater_stats(deepcopy(skater_data), season1)

In [9]:
# Get skater stats for most second season
skater_data_season2 = get_skater_stats(deepcopy(skater_data), season2)

In [16]:
# Convert lists of dictionries into DFs
goalie_df = pd.DataFrame(goalie_data)
skater_season1_df = pd.DataFrame(skater_data_season1)
skater_season2_df = pd.DataFrame(skater_data_season2)


In [17]:
# Cleaning goalie data
goalie_df = goalie_df.sort_values(['W'], ascending = False)[goalie_df['Games'] != 0]
goalie_df = goalie_df.drop('player_id', 1)
print(goalie_df.to_string(index=False))


        Player Name Team Position  Games  Games Started  W    GAA   SV%
 Andrei Vasilevskiy  TBL        G     52             52 35 2.5561 0.917
  Connor Hellebuyck  WPG        G     58             56 31 2.5699 0.922
  Jordan Binnington  STL        G     50             50 30 2.5647 0.912
  Frederik Andersen  CAR        G     52             52 29 2.8537 0.909
  Marc-Andre Fleury  MIN        G     49             48 27 2.7707 0.905
      David Rittich  WPG        G     48             48 24 2.9657 0.907
        Carter Hart  PHI        G     43             40 24 2.4195 0.914
    Jacob Markstrom  CGY        G     43             43 23 2.7509 0.918
   Sergei Bobrovsky  FLA        G     50             49 23 3.2288 0.900
Mackenzie Blackwood  NJD        G     47             43 22 2.7720 0.915
        Petr Mrazek  CHI        G     40             38 21 2.6885 0.905
     Pavel Francouz  COL        G     34             31 21 2.4132 0.923
        John Gibson  ANA        G     51             51 20 2.998

  goalie_df = goalie_df.sort_values(['W'], ascending = False)[goalie_df['Games'] != 0]
  goalie_df = goalie_df.drop('player_id', 1)


In [18]:
# Cleaning skater data and print the data
skater_season2_df = skater_season2_df.sort_values(['Points'], ascending = False)[skater_season2_df['Games'] != 0]
skater_season2_df = skater_season2_df.drop('player_id', 1)
print(skater_season2_df.to_string(index=False))



             Player Name Team Position  Games  G  A  Points  +/-  PIM  SOG  PPP
         Nikita Kucherov  TBL       RW     82 41 87     128   24   62  246   48
          Connor McDavid  EDM        C     78 41 75     116    3   20  240   33
            Patrick Kane  CHI       RW     81 44 66     110    2   22  341   30
          Leon Draisaitl  EDM        C     82 50 55     105    2   52  231   29
           Sidney Crosby  PIT        C     79 35 65     100   18   36  220   29
           Brad Marchand  BOS       LW     79 36 64     100   15   96  231   34
        Nathan MacKinnon  COL        C     82 41 58      99   20   34  365   37
         Johnny Gaudreau  CBJ       LW     82 36 63      99   18   24  245   27
          Steven Stamkos  TBL        C     82 45 53      98    4   37  234   40
       Aleksander Barkov  FLA        C     82 35 61      96   -3    8  206   31
         Mitchell Marner  TOR       RW     82 26 68      94   22   22  233   21
           Brayden Point  TBL        C  

  skater_season2_df = skater_season2_df.sort_values(['Points'], ascending = False)[skater_season2_df['Games'] != 0]
  skater_season2_df = skater_season2_df.drop('player_id', 1)


In [19]:
skater_season1_df = skater_season1_df.sort_values(['Points'], ascending = False)[skater_season1_df['Games'] != 0]
skater_season1_df = skater_season1_df.drop('player_id', 1)
print(skater_season1_df.to_string(index=False))

             Player Name Team Position  Games  G  A  Points  +/-  PIM  SOG  PPP
          Leon Draisaitl  EDM        C     71 43 67     110   -7   18  218   44
          Connor McDavid  EDM        C     64 34 63      97   -6   28  212   43
          Artemi Panarin  NYR       LW     69 32 63      95   36   20  209   24
          David Pastrnak  BOS       RW     70 48 47      95   21   40  279   38
        Nathan MacKinnon  COL        C     69 35 58      93   13   12  318   31
           Brad Marchand  BOS       LW     70 28 59      87   25   82  185   28
         Nikita Kucherov  TBL       RW     68 33 52      85   26   38  210   25
            Patrick Kane  CHI       RW     70 33 51      84    8   40  275   23
         Auston Matthews  TOR        C     70 47 33      80   19    8  290   25
             Jack Eichel  VGK        C     68 36 42      78    5   34  227   27
      Jonathan Huberdeau  CGY        C     69 23 55      78    5   30  152   29
          Mika Zibanejad  NYR        C  

  skater_season1_df = skater_season1_df.sort_values(['Points'], ascending = False)[skater_season1_df['Games'] != 0]
  skater_season1_df = skater_season1_df.drop('player_id', 1)


In [14]:
# Write cleaned DFs to csv files
merged_df = pd.concat([skater_season1_df, skater_season2_df], axis=0)

goalie_df.to_csv("goalie_stats.csv", index=False)
skater_season1_df.to_csv("skater_stats_season1.csv", index=False)
skater_season2_df.to_csv("skater_stats_season2.csv", index=False)
merged_df.to_csv("MergerdData.csv", index=False)