# Getting the data ready for the Front-End

In [2]:
import pandas as pd
import os

# Load in our data
filepath = '../../Data/Warehouse/MergedData/merged_data_clean.csv'
data = pd.read_csv(filepath)

data.head(-5)

Unnamed: 0,TEAM,PLAYER,POSITION,GP,TOI/GP,TOI,IPP,GOALS,TOTAL ASSISTS,FIRST ASSISTS,...,CAP HIT,AAV,SALARY,BASE SALARY,S.BONUS,P.BONUS,SEASON,SALARY CAP,SALARY CAP PERCENTAGE,DECEASED
0,chi,adam burish,r,81,11.748354,951.616667,40.00,4,4,3,...,575000,575000,575000,500000,75000,0,2007-08,50300000,0.011431,0
1,pit,adam hall,r,46,11.872826,546.150000,75.00,2,4,1,...,525000,525000,525000,525000,0,0,2007-08,50300000,0.010437,0
2,edm,ales hemsky,r,74,18.574099,1374.483333,81.61,20,51,36,...,4100000,4100000,3600000,3600000,0,0,2007-08,50300000,0.081511,0
3,buf,ales kotalik,r,79,15.342405,1212.050000,67.19,23,20,7,...,2333333,2333333,2500000,2500000,0,0,2007-08,50300000,0.046388,0
4,pit,alex goligoski,d,3,13.938889,41.816667,100.00,0,2,1,...,845300,984200,795300,500000,295300,188900,2007-08,50300000,0.016805,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12139,det,zach aston-reese,c,1,6.350000,6.350000,,0,0,0,...,775000,775000,775000,775000,0,0,2023-24,83500000,0.009281,0
12140,buf,zach benson,l,60,14.606389,876.383333,63.89,9,14,8,...,950000,1600000,950000,855000,95000,650000,2023-24,83500000,0.011377,0
12141,"min, t.b",zach bogosian,d,55,17.527879,964.033333,29.27,3,9,4,...,850000,850000,1050000,1050000,0,0,2023-24,83500000,0.010180,0
12142,stl,zach dean,c,1,8.683333,8.683333,,0,0,0,...,852500,863333,892500,800000,92500,32500,2023-24,83500000,0.010210,0


## Dropping Columns
The model primarily found the /GP rates more useful than the /60 rates, so for simplicity we will drop the /60 columns

In [3]:
# drop all columns from data that end in '/60'
data = data.drop(data.filter(regex='/60').columns, axis=1)

In [4]:
data.columns.tolist()

['TEAM',
 'PLAYER',
 'POSITION',
 'GP',
 'TOI/GP',
 'TOI',
 'IPP',
 'GOALS',
 'TOTAL ASSISTS',
 'FIRST ASSISTS',
 'SECOND ASSISTS',
 'TOTAL POINTS',
 'SHOTS',
 'IXG',
 'ICF',
 'IFF',
 'ISCF',
 'IHDCF',
 'RUSH ATTEMPTS',
 'REBOUNDS CREATED',
 'PIM',
 'TOTAL PENALTIES',
 'MINOR',
 'MAJOR',
 'MISCONDUCT',
 'PENALTIES DRAWN',
 'GIVEAWAYS',
 'TAKEAWAYS',
 'HITS',
 'HITS TAKEN',
 'SHOTS BLOCKED',
 'FACEOFFS WON',
 'FACEOFFS LOST',
 'GOALS/GP',
 'TOTAL ASSISTS/GP',
 'FIRST ASSISTS/GP',
 'SECOND ASSISTS/GP',
 'TOTAL POINTS/GP',
 'SHOTS/GP',
 'IXG/GP',
 'ICF/GP',
 'IFF/GP',
 'ISCF/GP',
 'IHDCF/GP',
 'RUSH ATTEMPTS/GP',
 'REBOUNDS CREATED/GP',
 'PIM/GP',
 'TOTAL PENALTIES/GP',
 'MINOR/GP',
 'MAJOR/GP',
 'MISCONDUCT/GP',
 'PENALTIES DRAWN/GP',
 'GIVEAWAYS/GP',
 'TAKEAWAYS/GP',
 'HITS/GP',
 'HITS TAKEN/GP',
 'SHOTS BLOCKED/GP',
 'FACEOFFS WON/GP',
 'FACEOFFS LOST/GP',
 'SH%',
 'FACEOFFS %',
 'PDO',
 'CF',
 'CA',
 'FF',
 'FA',
 'SF',
 'SA',
 'GF',
 'GA',
 'XGF',
 'XGA',
 'SCF',
 'SCA',
 'HDCF',
 'H

In [6]:
# Load in the predictions

# Load in our data
defenence_filepath = '../../Data/Warehouse/Predictions/defence_predictions.csv'
forwards_filepath = '../../Data/Warehouse/Predictions/forwards_predictions.csv'

defence_predictions = pd.read_csv(defenence_filepath)
forwards_predictions = pd.read_csv(forwards_filepath)

forwards_predictions.head()

Unnamed: 0,PLAYER,SEASON,TOI/GP,PRED_SALARY_PERCENTAGE
0,adam burish,2007-08,11.748354,0.01401
1,adam hall,2007-08,11.872826,0.015781
2,ales hemsky,2007-08,18.574099,0.073371
3,ales kotalik,2007-08,15.342405,0.04505
4,alex ovechkin,2007-08,23.101423,0.027567


In [7]:
# Join predictions to data
forwards_merged = data.merge(forwards_predictions, on=['PLAYER', 'SEASON', 'TOI/GP'], how='inner')
defence_merged = data.merge(defence_predictions, on=['PLAYER', 'SEASON', 'TOI/GP'], how='inner')

# Concatenate the two dataframes
final_data = pd.concat([forwards_merged, defence_merged])


In [11]:
final_data = final_data.sort_values(by=['SEASON', 'PLAYER']).reset_index(drop=True)
final_data.head(-10)[['PLAYER', 'SEASON', 'TEAM', 'PRED_SALARY_PERCENTAGE']]

Unnamed: 0,PLAYER,SEASON,TEAM,PRED_SALARY_PERCENTAGE
0,adam burish,2007-08,chi,0.014010
1,adam hall,2007-08,pit,0.015781
2,ales hemsky,2007-08,edm,0.073371
3,ales kotalik,2007-08,buf,0.045050
4,alex goligoski,2007-08,pit,0.034137
...,...,...,...,...
12134,wyatt kaiser,2023-24,chi,0.027145
12135,yakov trenin,2023-24,"col, nsh",0.026159
12136,yanni gourde,2023-24,sea,0.048688
12137,yegor chinakhov,2023-24,cbj,0.011167


In [12]:
# Save the data
outpath_dir = '../../Data/Warehouse/FrontEnd/'
outpath_file = 'front_end_data.csv'

if not os.path.exists(outpath_dir):
    os.makedirs(outpath_dir)

final_data.to_csv(outpath_dir + outpath_file, index=False)