# Converting Rates to Totals

The features of our data are all in rate form, and it occurs to me that it may be useful to utilize the totals for our features, instead of the rates. Here I will convert all features that are in a rate format "feature/60" back to simply "feature".

In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
# Load in our data
filepath = '../../Data/entitiesResolved/merged_data_clean.csv'
data = pd.read_csv(filepath)

Something convenient that I noticed is that TOI is already in the correct format. As opposed to minutes:seconds it has already been converted to total minutes

In [4]:
data['TOI'].head(-5)

0         951.616667
1        1754.250000
2         546.150000
3        1374.483333
4        1212.050000
            ...     
12947       6.350000
12948     876.383333
12949     964.033333
12950       8.683333
12951    1292.750000
Name: TOI, Length: 12952, dtype: float64

The second step is to find all columns that are in a rate format.

In [5]:
filtered_columns = data.filter(like='/60')

The third step is to convert each of these columns back to their totals.

In [6]:
for column in filtered_columns.columns:
    new_col = column[:-3]
    data[new_col] = np.round(data[column] * (data['TOI'] / 60)).astype('int')

# Converting standard features to /GM
I would also like to take some of the more standard features, like Goals, Assists, etc., and convert them to a different rate: per game

In [8]:
data.columns.tolist()

['POSITION',
 'PLAYER',
 'TEAM',
 'TOI',
 'GP',
 'TOI/GP',
 'GOALS/60',
 'TOTAL ASSISTS/60',
 'FIRST ASSISTS/60',
 'SECOND ASSISTS/60',
 'TOTAL POINTS/60',
 'IPP',
 'SHOTS/60',
 'SH%',
 'IXG/60',
 'ICF/60',
 'IFF/60',
 'ISCF/60',
 'IHDCF/60',
 'RUSH ATTEMPTS/60',
 'REBOUNDS CREATED/60',
 'PIM/60',
 'TOTAL PENALTIES/60',
 'MINOR/60',
 'MAJOR/60',
 'MISCONDUCT/60',
 'PENALTIES DRAWN/60',
 'GIVEAWAYS/60',
 'TAKEAWAYS/60',
 'HITS/60',
 'HITS TAKEN/60',
 'SHOTS BLOCKED/60',
 'FACEOFFS WON/60',
 'FACEOFFS LOST/60',
 'FACEOFFS %',
 'CF/60',
 'CA/60',
 'CF%',
 'FF/60',
 'FA/60',
 'FF%',
 'SF/60',
 'SA/60',
 'SF%',
 'GF/60',
 'GA/60',
 'GF%',
 'XGF/60',
 'XGA/60',
 'XGF%',
 'SCF/60',
 'SCA/60',
 'SCF%',
 'HDCF/60',
 'HDCA/60',
 'HDCF%',
 'HDGF/60',
 'HDGA/60',
 'HDGF%',
 'MDCF/60',
 'MDCA/60',
 'MDCF%',
 'MDGF/60',
 'MDGA/60',
 'MDGF%',
 'LDCF/60',
 'LDCA/60',
 'LDCF%',
 'LDGF/60',
 'LDGA/60',
 'LDGF%',
 'ON-ICE SH%',
 'ON-ICE SV%',
 'PDO',
 'OFF.\xa0ZONE STARTS/60',
 'NEU.\xa0ZONE STARTS/60',


In [9]:
convert_to_per_gp = ['GOALS',
'TOTAL ASSISTS',
'FIRST ASSISTS',
'SECOND ASSISTS',
'TOTAL POINTS',
'SHOTS',
'IXG',
'ICF',
'IFF',
'ISCF',
'IHDCF',
'RUSH ATTEMPTS',
'REBOUNDS CREATED',
'PIM',
'TOTAL PENALTIES',
'MINOR',
'MAJOR',
'MISCONDUCT',
'PENALTIES DRAWN',
'GIVEAWAYS',
'TAKEAWAYS',
'HITS',
'HITS TAKEN',
'SHOTS BLOCKED',
'FACEOFFS WON',
'FACEOFFS LOST',
'CF',
'CA',
'FF',
'FA',
'SF',
'SA',
'GF',
'GA',
'XGF',
'XGA',
'SCF',
'SCA',
'HDCF',
'HDCA',
'HDGF',
'HDGA',
'MDCF',
'MDCA',
'MDGF',
'MDGA',
'LDCF',
'LDCA',
'LDGF',
'LDGA',
'OFF.\xa0ZONE STARTS',
'NEU.\xa0ZONE STARTS',
'DEF.\xa0ZONE STARTS',
'ON\xa0THE\xa0FLY STARTS',
'OFF.\xa0ZONE FACEOFFS',
'NEU.\xa0ZONE FACEOFFS',
'DEF.\xa0ZONE FACEOFFS']

In [14]:
# For each column in the list above, we will divide by the number of games played
for column in convert_to_per_gp:
    new_col = column + '/GP'
    data[new_col] = np.round(data[column] / data['GP'], 4)

In [15]:
data

Unnamed: 0,POSITION,PLAYER,TEAM,TOI,GP,TOI/GP,GOALS/60,TOTAL ASSISTS/60,FIRST ASSISTS/60,SECOND ASSISTS/60,...,LDCA/GP,LDGF/GP,LDGA/GP,OFF. ZONE STARTS/GP,NEU. ZONE STARTS/GP,DEF. ZONE STARTS/GP,ON THE FLY STARTS/GP,OFF. ZONE FACEOFFS/GP,NEU. ZONE FACEOFFS/GP,DEF. ZONE FACEOFFS/GP
0,r,adam burish,chi,951.616667,81,11.748354,0.25,0.25,0.19,0.06,...,7.9136,0.0741,0.1728,1.6420,3.4691,2.5926,10.1975,2.4444,4.5309,3.8025
1,d,adam foote,"cbj, col",1754.250000,75,23.390000,0.03,0.51,0.10,0.41,...,15.6933,0.1200,0.3867,1.9733,5.8400,4.6000,16.6800,4.7067,9.4267,9.7600
2,r,adam hall,pit,546.150000,46,11.872826,0.22,0.44,0.11,0.33,...,9.4783,0.0217,0.0217,1.5000,3.6739,3.8696,8.4783,2.1739,4.6087,5.8261
3,r,ales hemsky,edm,1374.483333,74,18.574099,0.87,2.23,1.57,0.65,...,9.3649,0.3514,0.0811,4.0676,5.3784,2.3919,10.6351,7.0135,7.5000,4.2027
4,r,ales kotalik,buf,1212.050000,79,15.342405,1.14,0.99,0.35,0.64,...,7.2658,0.4177,0.1266,4.0000,3.8608,1.8354,9.2405,6.3038,5.4304,3.1013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12952,l,zach parise,col,234.533333,17,13.796078,1.02,0.77,0.51,0.26,...,15.2353,0.1765,0.2353,1.9412,2.8235,2.7059,12.3529,3.1765,4.0588,5.2353
12953,d,zach whitecloud,vgk,886.950000,50,17.739000,0.14,0.41,0.20,0.20,...,17.0200,0.1200,0.1800,1.7800,3.0800,3.1800,13.6800,4.1200,4.7200,5.8200
12954,r,zack macewen,ott,145.466667,24,6.061111,0.82,0.41,0.41,0.00,...,4.8750,0.1250,0.1667,1.1250,1.3750,0.5833,6.5833,1.7500,1.7500,1.3333
12955,c,zack ostapchuk,ott,62.450000,6,10.408333,0.00,0.00,0.00,0.00,...,10.3333,0.0000,0.0000,1.6667,2.1667,1.0000,10.6667,3.3333,2.5000,2.0000


In [17]:
mcdavid = data[data['PLAYER'] == 'connor mcdavid']
mcdavid[['GP', 'GOALS', 'GOALS/GP']]

Unnamed: 0,GP,GOALS,GOALS/GP
5587,45,16,0.3556
6437,82,30,0.3659
7261,82,41,0.5
8093,78,41,0.5256
8927,64,34,0.5312
9730,56,33,0.5893
10590,80,44,0.55
11482,82,64,0.7805
12328,65,26,0.4


In [18]:
# Save the merged data to a csv file
output_dir = '../../Data/entitiesResolved'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

output_file = output_dir + '/merged_data_final.csv'
data.to_csv(output_file, index=False)