# All Data is from Moneypuck.com

In [1]:
import pandas as pd
import numpy as np
import os
import ipynb

In [16]:
# Returns the position the team finished in the 2023/2024 season
def define_team_rank(row):
    # NHL regular season rankings where the index + 1 is the place in the standings that the team finished
    regularSeasonRankings = ["NYR", "DAL", "CAR", "WPG", "FLA", "VAN", "BOS", "COL", "EDM", "TOR", "NSH", "LAK", "TBL",
                             "VGK", "NYI", "STL", "WSH", "DET", "PIT", "MIN", "PHI", "BUF", 'NJD', "CGY", "SEA", "OTT",
                             "ARI", "MTL", "CBJ", "ANA", "CHI", "SJS"]
    return regularSeasonRankings.index(row["team"]) + 1

In [22]:
# Extract the player data by position

def extract_skater_data():

    skaters_file = open('./moneyPuckData/skaters.csv', 'r')
    goalies_file = open('./moneyPuckData/goalies.csv', 'r')
    skater_table = pd.read_csv(skaters_file)
    goalies_table = pd.read_csv(goalies_file)

    # remove uncessessary values like "shifts" because we will transform all data to per 60
    skater_table.drop(columns=["games_played", "shifts"], inplace=True)
    goalies_table.drop(columns=["games_played"], inplace=True)

    skaters_file.close()
    goalies_file.close()

    # Read the cap_hit csv and merge the cap hits with the statistical data
    cap_hit_file = open('./moneyPuckData/NHL_Contract_data.csv', 'r')

    cap_table = pd.read_csv(cap_hit_file)
    skater_table = skater_table.merge(cap_table, on=["name"])
    goalies_table = goalies_table.merge(cap_table, on=["name"])

    cap_hit_file.close()
    
    skater_table["team_rank"] = skater_table.apply(define_team_rank, axis=1)
    goalies_table["team_rank"] = goalies_table.apply(define_team_rank, axis=1)

    #skater_table = skater_table.loc[skater_table["icetime"]> 6000]

    defencemen = skater_table.loc[skater_table["position"] == "D"]
    centers = skater_table.loc[skater_table["position"] == "C"]
    left_wing = skater_table.loc[skater_table["position"] == "L"]
    right_wing = skater_table.loc[skater_table["position"] == "R"]
    forwards = skater_table.loc[skater_table["position"] != "D"]

    return defencemen, centers, left_wing, right_wing, forwards, goalies_table




In [9]:
# Return a dataframe that removes players who have not played more than 100 minutes
def remove_barely_played(df):
    return df.loc[df["icetime"] > 6000]

In [10]:
# Return a dataframe making all relavent stats standardized at a per 60 minute rate
def transform_data_per_60(df):
     team_rank = df.loc[:,"team_rank"]
     cap_hit = df.loc[:,"cap_hit"]
     # ICETIME IS IN SECONDS

     df.loc[:,"icetime"] = df.loc[:,"icetime"].div(60, axis=0)

     df.iloc[:,9:] = df.iloc[:,9:].div(df.icetime * (1/60), axis=0)
     df.iloc[:,1] = df.iloc[:,1].div(df.icetime * (1/60), axis=0)

     df["team_rank"] = team_rank
     df["cap_hit"] = cap_hit


     # Add a Ginishing Variable
     if "I_F_goals" in df.columns:
          df["finishing"] = df["I_F_goals"] - df["I_F_xGoals"]
     elif "goals" in df.columns:
          print("HI")
          df["GSAE"] = df["xGoals"] - df["goals"]
          df["Low_Danger"] = df["lowDangerxGoals"] - df["lowDangerGoals"]
          df["Medium_Danger"] = df["mediumDangerxGoals"] - df["mediumDangerGoals"]
          df["High_Danger"] = df["highDangerxGoals"] - df["highDangerGoals"]

     return df

In [5]:
# Extract the situation data from a given daatframe 
def extract_skater_situational_data(df):
    evenStrength = df.loc[df["situation"] == "5on5"]
    powerPlay = df.loc[df["situation"] == "5on4"]
    penaltyKill = df.loc[df["situation"] == "4on5"]
    allSituations = df.loc[df["situation"] == "all"]
    other = df.loc[df["situation"] == "ohter"]

    return evenStrength, powerPlay, penaltyKill, allSituations, other

In [11]:
# Return the numerical and text data as 2 seperate dataframes
def split_data_and_names(df):
    names = df.iloc[:,:5]
    data = df.iloc[:,6:]

    return names, data

In [12]:
defencemen, centers, left_wing, right_wing, forwards, goalies = extract_skater_data()


defencemen.drop(columns=["faceoffsWon", "faceoffsLost", "I_F_faceOffsWon"], inplace=True)
print(defencemen)
goalies.to_csv("./goalies.csv", index=False)

      playerId  season             name team position situation  icetime  \
0      8480950    2023  Ilya Lyubushkin  TOR        D     other   2881.0   
1      8480950    2023  Ilya Lyubushkin  TOR        D       all  76034.0   
2      8480950    2023  Ilya Lyubushkin  TOR        D      5on5  61758.0   
3      8480950    2023  Ilya Lyubushkin  TOR        D      4on5  11271.0   
4      8480950    2023  Ilya Lyubushkin  TOR        D      5on4    124.0   
...        ...     ...              ...  ...      ...       ...      ...   
3045   8477488    2023      Brett Pesce  CAR        D     other   2731.0   
3046   8477488    2023      Brett Pesce  CAR        D       all  85212.0   
3047   8477488    2023      Brett Pesce  CAR        D      5on5  72590.0   
3048   8477488    2023      Brett Pesce  CAR        D      4on5   9349.0   
3049   8477488    2023      Brett Pesce  CAR        D      5on4    542.0   

      gameScore  onIce_xGoalsPercentage  offIce_xGoalsPercentage  ...  \
0          5.0

In [13]:
evenStrengthD, powerPlayD, penaltyKillD, allSituationsD, otherD = extract_skater_situational_data(defencemen)
%store evenStrengthD
%store penaltyKillD
%store powerPlayD
evenStrengthD, powerPlayD, penaltyKillD = remove_barely_played(evenStrengthD), remove_barely_played(powerPlayD), remove_barely_played(penaltyKillD)
evenStrengthD_names, evenStrengthD_data = split_data_and_names(evenStrengthD)
powerPlayD_names, powerPlayD_data = split_data_and_names(powerPlayD)
penaltyKillD_names, penaltyKillD_data = split_data_and_names(penaltyKillD)

transformed_defence_data = transform_data_per_60(evenStrengthD_data)
transformed_ppD_data = transform_data_per_60(powerPlayD_data)
transformed_pkD_data = transform_data_per_60(penaltyKillD_data)

%store transformed_defence_data
%store evenStrengthD_names

%store transformed_ppD_data
%store powerPlayD_names
%store transformed_pkD_data
%store penaltyKillD_names

test_df = pd.concat([evenStrengthD_names, transformed_defence_data], axis=1)
test_df.to_csv("./evenStrengthDPer60.csv", index=False)

Stored 'evenStrengthD' (DataFrame)
Stored 'penaltyKillD' (DataFrame)
Stored 'powerPlayD' (DataFrame)
Stored 'transformed_defence_data' (DataFrame)
Stored 'evenStrengthD_names' (DataFrame)
Stored 'transformed_ppD_data' (DataFrame)
Stored 'powerPlayD_names' (DataFrame)
Stored 'transformed_pkD_data' (DataFrame)
Stored 'penaltyKillD_names' (DataFrame)


In [23]:
forwards.to_csv("./forwards.csv", index=False)
evenStrengthF, powerPlayF, penaltyKillF, allSituationsF, otherF = extract_skater_situational_data(forwards)
%store evenStrengthF
%store powerPlayF
%store penaltyKillF
evenStrengthF, powerPlayF, penaltyKillF = remove_barely_played(evenStrengthF), remove_barely_played(powerPlayF), remove_barely_played(penaltyKillF)
evenStrengthF_names, evenStrengthF_data = split_data_and_names(evenStrengthF)
powerPlayF_names, powerPlayF_data = split_data_and_names(powerPlayF)
penaltyKillF_names, penaltyKillF_data = split_data_and_names(penaltyKillF)

transformed_forward_data = transform_data_per_60(evenStrengthF_data)
transformed_ppF_data = transform_data_per_60(powerPlayF_data)
transformed_pkF_data = transform_data_per_60(penaltyKillF_data)

%store transformed_forward_data
%store evenStrengthF_names

%store transformed_ppF_data
%store powerPlayF_names
%store transformed_pkF_data
%store penaltyKillF_names

Stored 'evenStrengthF' (DataFrame)
Stored 'powerPlayF' (DataFrame)
Stored 'penaltyKillF' (DataFrame)
Stored 'transformed_forward_data' (DataFrame)
Stored 'evenStrengthF_names' (DataFrame)
Stored 'transformed_ppF_data' (DataFrame)
Stored 'powerPlayF_names' (DataFrame)
Stored 'transformed_pkF_data' (DataFrame)
Stored 'penaltyKillF_names' (DataFrame)


In [15]:
evenStrengthG, powerPlayG, penaltyKillG, allSituationsG, otherG = extract_skater_situational_data(goalies)
%store evenStrengthG
%store powerPlayG
%store penaltyKillG

print(evenStrengthG["cap_hit"])


evenStrengthG, powerPlayG, penaltyKillG = remove_barely_played(evenStrengthG), remove_barely_played(powerPlayG), remove_barely_played(penaltyKillG)
evenStrengthG_names, evenStrengthG_data = split_data_and_names(evenStrengthG)
powerPlayG_names, powerPlayG_data = split_data_and_names(powerPlayG)
penaltyKillG_names, penaltyKillG_data = split_data_and_names(penaltyKillG)

transformed_goalie_data = transform_data_per_60(evenStrengthG_data)
transformed_ppG_data = transform_data_per_60(powerPlayG_data)
transformed_pkG_data = transform_data_per_60(penaltyKillG_data)

%store transformed_goalie_data
%store evenStrengthG_names

%store transformed_ppG_data
%store powerPlayG_names
%store transformed_pkG_data
%store penaltyKillG_names

Stored 'evenStrengthG' (DataFrame)
Stored 'powerPlayG' (DataFrame)
Stored 'penaltyKillG' (DataFrame)
2      5666667
7      4000000
12     1500000
17     5000000
22     5000000
        ...   
322    4750000
327     766666
332    1100000
337    5250000
342     962500
Name: cap_hit, Length: 69, dtype: int64
HI
HI
HI
Stored 'transformed_goalie_data' (DataFrame)
Stored 'evenStrengthG_names' (DataFrame)
Stored 'transformed_ppG_data' (DataFrame)
Stored 'powerPlayG_names' (DataFrame)
Stored 'transformed_pkG_data' (DataFrame)
Stored 'penaltyKillG_names' (DataFrame)
