In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [2]:
columns_read = ['Profile', 'Player', 'Goals',
                'Assists', 'Plus_minus', 'Penalties',
                'Shots', 'Hits', 'Shots_blocked',
                'Penalties_against', 'Icetime_seconds'
                ]
defenses = pd.read_csv('players/after_2014/defenses_match_after_2014.csv', usecols=columns_read)
defenses = pd.read_csv('players/after_2014/defenses_match_after_2014.csv')
goaltenders = pd.read_csv('players/after_2014/goaltenders_match_after_2014.csv')

In [4]:
columns_read = ['Profile', 'Player', 'Season', 'Year', 'Team', 'Winner',
                'Goals', 'Assists', 'Plus', 'Minus', 'Penalties',
                'Shots', 'Hits', 'Shots_blocked',
                'Penalties_against', 'Icetime_seconds', 'Rating'
                ]
defenses = pd.read_csv('defenses_match_with_rating.csv', usecols=columns_read)

In [5]:
defenses = defenses[defenses['Season'] == 'Regular season']
defenses = defenses[defenses['Points'] >= 10]
defenses = defenses[defenses['Games'] >= 20]
defenses = defenses[defenses['Icetime_seconds'] >= 480]
defenses = defenses[defenses.groupby(['Profile', 'Year']).Profile.transform('count') >= 20]
defenses.groupby(['Profile', 'Year'])['Player'].count()

Profile                           Year     
https://en.khl.ru/players/10162/  2014/2015    57
                                  2015/2016    56
                                  2016/2017    59
                                  2017/2018    46
                                  2019/2020    26
https://en.khl.ru/players/10176/  2014/2015    56
                                  2015/2016    60
                                  2016/2017    74
                                  2017/2018    31
                                  2018/2019    58
                                  2019/2020    44
                                  2020/2021    44
                                  2021/2022    28
https://en.khl.ru/players/10546/  2014/2015    73
                                  2015/2016    55
                                  2016/2017    63
                                  2017/2018    44
                                  2018/2019    62
                                  2019/2020    63
      

In [6]:
defenses = defenses[defenses.groupby(['Profile']).Year.transform('nunique') > 2]
defenses.groupby(['Profile', 'Year']).count()

defenses = defenses.groupby(['Profile', 'Year']).sum().reset_index()

In [7]:
defenses = defenses.assign(R = defenses['Rating'] / defenses['Icetime_seconds'] * 600)
defenses.drop(['Rating', 'Icetime_seconds'], axis=1, inplace=True)
defenses.head()


Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Hits,Shots_blocked,Penalties_against,R
0,https://en.khl.ru/players/10162/,2014/2015,1,6,29,39,56,71,22.0,37.0,4.0,0.140694
1,https://en.khl.ru/players/10162/,2015/2016,8,17,40,34,62,161,62.0,64.0,5.0,1.037649
2,https://en.khl.ru/players/10162/,2016/2017,2,8,36,42,40,124,45.0,55.0,4.0,0.469554
3,https://en.khl.ru/players/10162/,2017/2018,0,6,20,30,46,62,18.0,52.0,2.0,0.139347
4,https://en.khl.ru/players/10162/,2019/2020,1,7,8,17,18,37,5.0,20.0,1.0,0.414211


In [81]:
defenses.groupby(['Profile', 'Year']).sum().reset_index().head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Hits,Shots_blocked,Penalties_against,R
0,https://en.khl.ru/players/10769/,2014/2015,13,23,30,42,47,151,136.0,16.0,26.0,2.955278
1,https://en.khl.ru/players/10769/,2015/2016,20,20,32,28,40,155,111.0,14.0,9.0,3.616655
2,https://en.khl.ru/players/10769/,2016/2017,16,18,26,17,49,123,88.0,20.0,10.0,3.083902
3,https://en.khl.ru/players/10769/,2017/2018,3,10,13,16,16,43,40.0,12.0,1.0,2.206179
4,https://en.khl.ru/players/10769/,2018/2019,14,14,28,26,28,104,73.0,15.0,15.0,2.628674


In [55]:
defenses.to_csv('defenses_seasons_with_rating_match.csv', encoding='utf8', index=False)

In [8]:
defenses[['T1_Profile', 'T1_Year',	'T1_Goals', 'T1_Assists',	'T1_Plus',	'T1_Minus', 'T1_Penalties',
          'T1_Shots', 'T1_Hits', 'T1_Shots_blocked', 'T1_Penalties_against', 'T1_R']] \
    = defenses[['Profile',  'Year',	'Goals', 'Assists',	'Plus',	'Minus', 'Penalties',
          'Shots', 'Hits', 'Shots_blocked', 'Penalties_against', 'R']].shift(-1)
defenses[['T2_Profile', 'T2_Year',	'T2_Goals', 'T2_Assists',	'T2_Plus',	'T2_Minus', 'T2_Penalties',
          'T2_Shots', 'T2_Hits', 'T2_Shots_blocked', 'T2_Penalties_against', 'T2_R']] \
    = defenses[['Profile',  'Year',	'Goals', 'Assists',	'Plus',	'Minus', 'Penalties',
                'Shots', 'Hits', 'Shots_blocked', 'Penalties_against', 'R']].shift(-2)
defenses.head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Hits,Shots_blocked,Penalties_against,R,T1_Profile,T1_Year,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_R,T2_Profile,T2_Year,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_R
0,https://en.khl.ru/players/10162/,2014/2015,1,6,29,39,56,71,22.0,37.0,4.0,0.140694,https://en.khl.ru/players/10162/,2015/2016,8.0,17.0,40.0,34.0,62.0,161.0,62.0,64.0,5.0,1.037649,https://en.khl.ru/players/10162/,2016/2017,2.0,8.0,36.0,42.0,40.0,124.0,45.0,55.0,4.0,0.469554
1,https://en.khl.ru/players/10162/,2015/2016,8,17,40,34,62,161,62.0,64.0,5.0,1.037649,https://en.khl.ru/players/10162/,2016/2017,2.0,8.0,36.0,42.0,40.0,124.0,45.0,55.0,4.0,0.469554,https://en.khl.ru/players/10162/,2017/2018,0.0,6.0,20.0,30.0,46.0,62.0,18.0,52.0,2.0,0.139347
2,https://en.khl.ru/players/10162/,2016/2017,2,8,36,42,40,124,45.0,55.0,4.0,0.469554,https://en.khl.ru/players/10162/,2017/2018,0.0,6.0,20.0,30.0,46.0,62.0,18.0,52.0,2.0,0.139347,https://en.khl.ru/players/10162/,2019/2020,1.0,7.0,8.0,17.0,18.0,37.0,5.0,20.0,1.0,0.414211
3,https://en.khl.ru/players/10162/,2017/2018,0,6,20,30,46,62,18.0,52.0,2.0,0.139347,https://en.khl.ru/players/10162/,2019/2020,1.0,7.0,8.0,17.0,18.0,37.0,5.0,20.0,1.0,0.414211,https://en.khl.ru/players/10176/,2014/2015,4.0,11.0,50.0,16.0,25.0,69.0,75.0,86.0,10.0,1.898193
4,https://en.khl.ru/players/10162/,2019/2020,1,7,8,17,18,37,5.0,20.0,1.0,0.414211,https://en.khl.ru/players/10176/,2014/2015,4.0,11.0,50.0,16.0,25.0,69.0,75.0,86.0,10.0,1.898193,https://en.khl.ru/players/10176/,2015/2016,5.0,9.0,29.0,31.0,14.0,94.0,48.0,96.0,9.0,1.070168


In [9]:
defenses = defenses[(defenses['Profile'] == defenses['T1_Profile']) & (defenses['Profile'] == defenses['T2_Profile'])]
defenses.reset_index(drop=True, inplace=True)
defenses.head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Hits,Shots_blocked,Penalties_against,R,T1_Profile,T1_Year,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_R,T2_Profile,T2_Year,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_R
0,https://en.khl.ru/players/10162/,2014/2015,1,6,29,39,56,71,22.0,37.0,4.0,0.140694,https://en.khl.ru/players/10162/,2015/2016,8.0,17.0,40.0,34.0,62.0,161.0,62.0,64.0,5.0,1.037649,https://en.khl.ru/players/10162/,2016/2017,2.0,8.0,36.0,42.0,40.0,124.0,45.0,55.0,4.0,0.469554
1,https://en.khl.ru/players/10162/,2015/2016,8,17,40,34,62,161,62.0,64.0,5.0,1.037649,https://en.khl.ru/players/10162/,2016/2017,2.0,8.0,36.0,42.0,40.0,124.0,45.0,55.0,4.0,0.469554,https://en.khl.ru/players/10162/,2017/2018,0.0,6.0,20.0,30.0,46.0,62.0,18.0,52.0,2.0,0.139347
2,https://en.khl.ru/players/10162/,2016/2017,2,8,36,42,40,124,45.0,55.0,4.0,0.469554,https://en.khl.ru/players/10162/,2017/2018,0.0,6.0,20.0,30.0,46.0,62.0,18.0,52.0,2.0,0.139347,https://en.khl.ru/players/10162/,2019/2020,1.0,7.0,8.0,17.0,18.0,37.0,5.0,20.0,1.0,0.414211
3,https://en.khl.ru/players/10176/,2014/2015,4,11,50,16,25,69,75.0,86.0,10.0,1.898193,https://en.khl.ru/players/10176/,2015/2016,5.0,9.0,29.0,31.0,14.0,94.0,48.0,96.0,9.0,1.070168,https://en.khl.ru/players/10176/,2016/2017,12.0,29.0,60.0,22.0,22.0,206.0,64.0,103.0,9.0,2.3363
4,https://en.khl.ru/players/10176/,2015/2016,5,9,29,31,14,94,48.0,96.0,9.0,1.070168,https://en.khl.ru/players/10176/,2016/2017,12.0,29.0,60.0,22.0,22.0,206.0,64.0,103.0,9.0,2.3363,https://en.khl.ru/players/10176/,2017/2018,2.0,8.0,19.0,9.0,29.0,86.0,22.0,25.0,4.0,1.58046


In [10]:
defenses.drop(['T1_Profile', 'T2_Profile'], axis=1, inplace=True)
defenses.drop(['T1_Year', 'T2_Year'], axis=1, inplace=True)
defenses.drop(['Profile'], axis=1, inplace=True)

In [11]:
dummies = defenses.copy()
dummies = pd.get_dummies(dummies, drop_first=True)
dummies.head()

Unnamed: 0,Goals,Assists,Plus,Minus,Penalties,Shots,Hits,Shots_blocked,Penalties_against,R,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_R,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_R,Year_2015/2016,Year_2016/2017,Year_2017/2018,Year_2018/2019,Year_2019/2020
0,1,6,29,39,56,71,22.0,37.0,4.0,0.140694,8.0,17.0,40.0,34.0,62.0,161.0,62.0,64.0,5.0,1.037649,2.0,8.0,36.0,42.0,40.0,124.0,45.0,55.0,4.0,0.469554,0,0,0,0,0
1,8,17,40,34,62,161,62.0,64.0,5.0,1.037649,2.0,8.0,36.0,42.0,40.0,124.0,45.0,55.0,4.0,0.469554,0.0,6.0,20.0,30.0,46.0,62.0,18.0,52.0,2.0,0.139347,1,0,0,0,0
2,2,8,36,42,40,124,45.0,55.0,4.0,0.469554,0.0,6.0,20.0,30.0,46.0,62.0,18.0,52.0,2.0,0.139347,1.0,7.0,8.0,17.0,18.0,37.0,5.0,20.0,1.0,0.414211,0,1,0,0,0
3,4,11,50,16,25,69,75.0,86.0,10.0,1.898193,5.0,9.0,29.0,31.0,14.0,94.0,48.0,96.0,9.0,1.070168,12.0,29.0,60.0,22.0,22.0,206.0,64.0,103.0,9.0,2.3363,0,0,0,0,0
4,5,9,29,31,14,94,48.0,96.0,9.0,1.070168,12.0,29.0,60.0,22.0,22.0,206.0,64.0,103.0,9.0,2.3363,2.0,8.0,19.0,9.0,29.0,86.0,22.0,25.0,4.0,1.58046,1,0,0,0,0


In [12]:
y = dummies['T2_R'].copy()
X = dummies.drop(
    ['T2_R', 'T2_Goals', 'T2_Assists',	'T2_Plus',	'T2_Minus', 'T2_Penalties',
     'T2_Shots', 'T2_Hits', 'T2_Shots_blocked', 'T2_Penalties_against'], axis=1).copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [61]:
print(X_test.head())

      Goals  Assists  Plus  Minus  Penalties  Shots  Hits  Shots_blocked  \
413      10        5    15     15         14     64  16.0           13.0   
316      10        8    28     12          6    108  17.0            5.0   
1034      3       10    23     17         20     71  27.0           22.0   
65       12       18    30     29         30     90  55.0           13.0   
1024      4        0     6      2          4     27  22.0            5.0   

      Penalties_against         R  T1_Goals  T1_Assists  T1_Plus  T1_Minus  \
413                13.0  2.329095       4.0         2.0     10.0      21.0   
316                 8.0  3.682755      25.0        18.0     29.0      21.0   
1034                9.0  1.972319       5.0         6.0     27.0      22.0   
65                 15.0  2.515040       2.0         9.0     10.0      13.0   
1024                6.0  2.119270       1.0         6.0      5.0      13.0   

      T1_Penalties  T1_Shots  T1_Hits  T1_Shots_blocked  T1_Penalties_agai

In [111]:
y_pred = X_test['T1_R']

In [13]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

NameError: name 'y_pred' is not defined

In [64]:
y_pred = (X_test['R'] + X_test['T1_R']) / 2

In [65]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

MSE: 0.4305789446536373
RMSE: 0.6561851451028415
MAE: 0.5276192468690054
R_squared: 0.18084247391882013


In [14]:
from sklearn.linear_model import LinearRegression

linear = LinearRegression(n_jobs=-1)
linear.fit(X_train, y_train)
y_pred = linear.predict(X_test)

In [15]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

# Getting coefficients in a more readable form.
linear_coef = pd.DataFrame(zip(X.columns, linear.coef_))
linear_coef.columns = ['Feature', 'Coefficient']
print(linear_coef)


MSE: 0.14348475828711504
RMSE: 0.3787938202863334
MAE: 0.29499001561683164
R_squared: -0.9883868209129187
MAPE: 0.36275718522483946
                 Feature  Coefficient
0                  Goals     0.000034
1                Assists     0.001434
2                   Plus    -0.003213
3                  Minus    -0.002292
4              Penalties    -0.001138
5                  Shots     0.001944
6                   Hits     0.000194
7          Shots_blocked    -0.000047
8      Penalties_against     0.006024
9                      R     0.165823
10              T1_Goals    -0.005039
11            T1_Assists    -0.002249
12               T1_Plus     0.008757
13              T1_Minus    -0.010183
14          T1_Penalties    -0.001403
15              T1_Shots     0.002368
16               T1_Hits     0.001111
17      T1_Shots_blocked     0.000470
18  T1_Penalties_against     0.003478
19                  T1_R     0.129391
20        Year_2015/2016    -0.090617
21        Year_2016/2017    -0.1

In [16]:
from sklearn.ensemble import RandomForestRegressor

forest = RandomForestRegressor(n_jobs=-1)
forest.fit(X_train, y_train)
y_pred = forest.predict(X_test)


In [17]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

MSE: 0.14282171469652377
RMSE: 0.3779176030519401
MAE: 0.29287717142850805
R_squared: -1.016867371622499
MAPE: 0.3582798820832331


In [18]:
from sklearn.ensemble import GradientBoostingRegressor

gradient = GradientBoostingRegressor()
gradient.fit(X_train, y_train)
y_pred = gradient.predict(X_test)

In [19]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))


MSE: 0.15109913831827024
RMSE: 0.3887147261402252
MAE: 0.3012607228661394
R_squared: -0.967450259270711
MAPE: 0.39512220210136145


In [20]:
from sklearn.linear_model import LassoCV

lasso = LassoCV()
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)

In [21]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

MSE: 0.14078866211085456
RMSE: 0.37521815269367575
MAE: 0.28953997778115026
R_squared: -1.1224488694488945
MAPE: 0.35803927281889536


In [22]:
from sklearn.linear_model import Ridge

ridge = Ridge()
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)

In [23]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

MSE: 0.14327585129902481
RMSE: 0.3785179669434792
MAE: 0.29459581579819943
R_squared: -0.9916470112785083
MAPE: 0.36216254249234775
