In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import requests
import json
from bs4 import BeautifulSoup
import pandas as pd
from pandas import DataFrame, Series


In [2]:
all_seasons = []

for season in range(2021, 2024):
    summary_url = f"https://api.nhle.com/stats/rest/en/skater/summary?limit=-1&cayenneExp=seasonId={season}{season+1}%20and%20gameTypeId=2"

    try:
        summary_resp = requests.get(summary_url)
        summary_resp.raise_for_status() 
        summary_json =  summary_resp.json()

        if summary_json['data']:
            df_summary = DataFrame(summary_json['data'])
            all_seasons.append(df_summary)
            df_summary['season'] = f"{season}-{season + 1}"
            print(f"Successfully fetched data for season {season}-{season+1}")
        else:
            print(f"No data returned for season {season}-{season + 1}")
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for season {season}-{season + 1}: {e}")

if all_seasons:
    nhl_api_df = pd.concat(all_seasons, ignore_index=True)
    nhl_api_df = nhl_api_df.groupby('playerId').agg({
            'playerId': 'first',
            'skaterFullName': 'first',
            'positionCode': 'first',
            'gamesPlayed': 'sum',
            'otGoals': 'sum',
            'gameWinningGoals': 'sum',
        }).reset_index(drop = True)
    
print(nhl_api_df)

Successfully fetched data for season 2021-2022
Successfully fetched data for season 2022-2023
Successfully fetched data for season 2023-2024
      playerId     skaterFullName positionCode  gamesPlayed  otGoals  \
0      8465009        Zdeno Chara            D           72        0   
1      8466138       Joe Thornton            C           34        0   
2      8469455       Jason Spezza            C           71        0   
3      8470281       Duncan Keith            D           64        0   
4      8470595         Eric Staal            C           72        0   
...        ...                ...          ...          ...      ...   
1250   8484314       Jiri Smejkal            L           20        0   
1251   8484321  Nikolas Matinpalo            D            4        0   
1252   8484325    Waltteri Merela            C           19        0   
1253   8484326        Patrik Koch            D            1        0   
1254   8484911        Collin Graf            R            7        

In [3]:
nhl_api_df = nhl_api_df.loc[(nhl_api_df['positionCode'] != 'D') & (nhl_api_df['gamesPlayed'] >= 60)]
nhl_api_df = nhl_api_df.reset_index(drop = True)
nhl_api_df = nhl_api_df.fillna(0)

nhl_api_df.rename(columns = {'gameWinningGoals': 'game_winning_goals'}, inplace = True)
nhl_api_df.rename(columns = {'otGoals': 'ot_goals'}, inplace = True)
nhl_api_df.rename(columns = {'skaterFullName': 'Player'}, inplace = True)

In [4]:
goals_up_one_url = "https://www.naturalstattrick.com/playerteams.php?fromseason=20212022&thruseason=20232024&stype=2&sit=all&score=u1&stdoi=std&rate=n&team=ALL&pos=F&loc=B&toi=0&gpfilt=none&fd=&td=&tgp=410&lines=single&draftteam=ALL"
goals_down_one_url = "https://www.naturalstattrick.com/playerteams.php?fromseason=20212022&thruseason=20232024&stype=2&sit=all&score=d1&stdoi=std&rate=n&team=ALL&pos=F&loc=B&toi=0&gpfilt=none&fd=&td=&tgp=410&lines=single&draftteam=ALL"
tied_url = "https://www.naturalstattrick.com/playerteams.php?fromseason=20212022&thruseason=20232024&stype=2&sit=all&score=tied&stdoi=std&rate=n&team=ALL&pos=F&loc=B&toi=0&gpfilt=none&fd=&td=&tgp=410&lines=single&draftteam=ALL"
total_url = "https://www.naturalstattrick.com/playerteams.php?fromseason=20212022&thruseason=20232024&stype=2&sit=all&score=all&stdoi=std&rate=n&team=ALL&pos=F&loc=B&toi=0&gpfilt=none&fd=&td=&tgp=410&lines=single&draftteam=ALL"

In [5]:
goals_up_one_df = pd.read_html(goals_up_one_url, header=0, index_col=0, na_values=["-"])[0]
goals_down_one_df = pd.read_html(goals_down_one_url, header=0, index_col=0, na_values=["-"])[0]
goals_tied_df = pd.read_html(tied_url, header=0, index_col=0, na_values=["-"])[0]
total_df = pd.read_html(total_url, header=0, index_col=0, na_values=["-"])[0]

In [7]:
goals_up_one_df.rename(columns = {'Goals': 'goals_up_by_one'}, inplace = True)
goals_down_one_df.rename(columns = {'Goals': 'goals_down_by_one'}, inplace = True)
goals_tied_df.rename(columns = {'Goals': 'goals_when_tied'}, inplace = True)
total_df.rename(columns = {'Goals': 'total_goals'}, inplace = True)

In [9]:
merged_natural_stat = pd.merge(pd.merge(goals_up_one_df[['Player', 'GP', 'goals_up_by_one']],goals_down_one_df[['Player', 'goals_down_by_one']],on='Player'),goals_tied_df[['Player', 'goals_when_tied']],on='Player')
merged_natural_stat = merged_natural_stat.loc[merged_natural_stat['GP'] >= 60]

In [10]:
merged_natural_stat

Unnamed: 0,Player,GP,goals_up_by_one,goals_down_by_one,goals_when_tied
1,Jason Spezza,71,2,4,1
2,Eric Staal,72,3,2,3
3,Jeff Carter,227,12,10,9
4,Dustin Brown,64,1,2,2
5,Zach Parise,194,3,6,12
...,...,...,...,...,...
759,Juraj Slafkovsky,121,2,5,6
764,Ben Meyers,67,0,1,2
769,Andrei Kuzmenko,153,8,13,18
772,Connor Bedard,68,2,6,9


In [11]:
nhl_api_df

Unnamed: 0,playerId,Player,positionCode,gamesPlayed,ot_goals,game_winning_goals
0,8469455,Jason Spezza,C,71,0,1
1,8470595,Eric Staal,C,72,0,1
2,8470604,Jeff Carter,C,227,2,8
3,8470606,Dustin Brown,R,64,0,0
4,8470610,Zach Parise,L,194,1,7
...,...,...,...,...,...,...
508,8483515,Juraj Slafkovsky,L,121,0,2
509,8483570,Ben Meyers,C,67,0,0
510,8483808,Andrei Kuzmenko,L,153,3,8
511,8484144,Connor Bedard,C,68,1,2


In [13]:
merged_clutch_goals = nhl_api_df.merge(merged_natural_stat, on = 'Player', how = 'left')

In [14]:
merged_clutch_goals

Unnamed: 0,playerId,Player,positionCode,gamesPlayed,ot_goals,game_winning_goals,GP,goals_up_by_one,goals_down_by_one,goals_when_tied
0,8469455,Jason Spezza,C,71,0,1,71.0,2.0,4.0,1.0
1,8470595,Eric Staal,C,72,0,1,72.0,3.0,2.0,3.0
2,8470604,Jeff Carter,C,227,2,8,227.0,12.0,10.0,9.0
3,8470606,Dustin Brown,R,64,0,0,64.0,1.0,2.0,2.0
4,8470610,Zach Parise,L,194,1,7,194.0,3.0,6.0,12.0
...,...,...,...,...,...,...,...,...,...,...
508,8483515,Juraj Slafkovsky,L,121,0,2,121.0,2.0,5.0,6.0
509,8483570,Ben Meyers,C,67,0,0,67.0,0.0,1.0,2.0
510,8483808,Andrei Kuzmenko,L,153,3,8,153.0,8.0,13.0,18.0
511,8484144,Connor Bedard,C,68,1,2,68.0,2.0,6.0,9.0


In [41]:
merged_clutch_goals['goals_up_by_one'].isnull()

0      False
1      False
2      False
3      False
4      False
       ...  
508    False
509    False
510    False
511    False
512    False
Name: goals_up_by_one, Length: 513, dtype: bool

In [45]:
merged.loc[merged['Player'] == 'Mitchell Marner']

Unnamed: 0,Player,GP,goals_up_by_one,goals_down_by_one,goals_when_tied
68,Mitchell Marner,221,14,19,29


In [42]:
null_rows = merged_clutch_goals[merged_clutch_goals.isnull().any(axis=1)]
print("Rows with null values:")
print(null_rows)

Rows with null values:
     playerId               Player positionCode  gamesPlayed  ot_goals  \
38    8474034       Patrick Maroon            L          212         0   
169   8477021    Alexander Kerfoot            C          246         0   
188   8477426            Nick Paul            L          242         0   
200   8477482      Zachary Sanford            L          125         0   
209   8477505   Alexander Wennberg            C          241         0   
290   8478483         Mitch Marner            R          221         3   
361   8479944  Zachary Aston-Reese            C          149         0   
388   8480031       Maxime Comtois            L          117         0   
402   8480281   Alexey Toropchenko            R          179         0   

     game_winning_goals  GP  goals_up_by_one  goals_down_by_one  \
38                    0 NaN              NaN                NaN   
169                   5 NaN              NaN                NaN   
188                   8 NaN        