In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [2]:
columns_read = ['Profile', 'Player', 'Goals',
                'Assists', 'Plus_minus', 'Penalties',
                'Shots', 'Hits', 'Shots_blocked',
                'Penalties_against', 'Icetime_seconds'
                ]
forwards = pd.read_csv('players/after_2014/forwards_match_after_2014.csv', usecols=columns_read)
defenses = pd.read_csv('players/after_2014/defenses_match_after_2014.csv')
goaltenders = pd.read_csv('players/after_2014/goaltenders_match_after_2014.csv')

In [2]:
columns_read = ['Profile', 'Player', 'Season', 'Year', 'Team', 'Winner',
                'Goals', 'Assists', 'Plus', 'Minus', 'Penalties',
                'Shots', 'Hits', 'Shots_blocked',
                'Penalties_against', 'Icetime_seconds', 'Rating'
                ]
forwards = pd.read_csv('forwards_match_with_rating.csv', usecols=columns_read)

In [105]:
forwards = forwards[forwards['Season'] == 'Regular season']
forwards = forwards[forwards['Games'] >= 20]
forwards = forwards[forwards['Icetime_seconds'] >= 480]
forwards = forwards[forwards.groupby(['Profile', 'Year']).Profile.transform('count') >= 20]
forwards.groupby(['Profile', 'Year'])['Player'].count()

Profile                           Year     
https://en.khl.ru/players/10769/  2014/2015    54
                                  2015/2016    57
                                  2016/2017    51
                                  2017/2018    22
                                  2018/2019    49
                                  2019/2020    59
                                  2020/2021    23
https://en.khl.ru/players/10892/  2014/2015    39
                                  2015/2016    55
https://en.khl.ru/players/10900/  2014/2015    59
                                  2015/2016    66
                                  2016/2017    49
https://en.khl.ru/players/10942/  2014/2015    21
https://en.khl.ru/players/10992/  2014/2015    20
https://en.khl.ru/players/119/    2014/2015    49
                                  2015/2016    60
                                  2016/2017    54
https://en.khl.ru/players/120/    2014/2015    63
                                  2015/2016    62
      

In [3]:
forwards = forwards[forwards.groupby(['Profile']).Year.transform('nunique') > 2]
forwards.groupby(['Profile', 'Year']).count()

forwards = forwards.groupby(['Profile', 'Year']).sum().reset_index()

In [67]:
forwards.head()

Unnamed: 0,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Icetime_seconds,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_Rating,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Icetime_seconds,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_Rating
0,2014/2015,13,23,30,42,47,151,51317,136.0,16.0,26.0,239,20.0,20.0,32.0,28.0,40.0,155.0,50002.0,111.0,14.0,9.0,284.0,16.0,18.0,26.0,17.0,49.0,123.0,50416.0,88.0,20.0,10.0,241.0
1,2015/2016,20,20,32,28,40,155,50002,111.0,14.0,9.0,284,16.0,18.0,26.0,17.0,49.0,123.0,50416.0,88.0,20.0,10.0,241.0,3.0,10.0,13.0,16.0,16.0,43.0,22301.0,40.0,12.0,1.0,76.0
2,2016/2017,16,18,26,17,49,123,50416,88.0,20.0,10.0,241,3.0,10.0,13.0,16.0,16.0,43.0,22301.0,40.0,12.0,1.0,76.0,14.0,14.0,28.0,26.0,28.0,104.0,49396.0,73.0,15.0,15.0,202.0
3,2017/2018,3,10,13,16,16,43,22301,40.0,12.0,1.0,76,14.0,14.0,28.0,26.0,28.0,104.0,49396.0,73.0,15.0,15.0,202.0,17.0,17.0,33.0,50.0,34.0,122.0,61315.0,84.0,27.0,6.0,200.0
4,2018/2019,14,14,28,26,28,104,49396,73.0,15.0,15.0,202,17.0,17.0,33.0,50.0,34.0,122.0,61315.0,84.0,27.0,6.0,200.0,2.0,8.0,9.0,18.0,6.0,35.0,21689.0,17.0,16.0,2.0,47.0


In [4]:
forwards.groupby(['Profile', 'Year']).sum().reset_index().head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating
0,https://en.khl.ru/players/10769/,2014/2015,13,23,30,42,47,151,51317,136.0,16.0,26.0,239
1,https://en.khl.ru/players/10769/,2015/2016,20,21,33,28,65,156,50225,112.0,14.0,9.0,276
2,https://en.khl.ru/players/10769/,2016/2017,16,18,26,17,49,123,50416,88.0,20.0,10.0,241
3,https://en.khl.ru/players/10769/,2017/2018,3,10,13,16,16,43,22301,40.0,12.0,1.0,76
4,https://en.khl.ru/players/10769/,2018/2019,14,14,28,27,28,105,49657,73.0,15.0,15.0,201


In [29]:
forwards.to_csv('forwards_seasons.csv', encoding='utf8', index=False)

In [5]:
forwards[['T1_Profile', 'T1_Year',	'T1_Goals', 'T1_Assists',	'T1_Plus',	'T1_Minus', 'T1_Penalties',
          'T1_Shots', 'T1_Icetime_seconds', 'T1_Hits', 'T1_Shots_blocked', 'T1_Penalties_against', 'T1_Rating']] \
    = forwards[['Profile',  'Year',	'Goals', 'Assists',	'Plus',	'Minus', 'Penalties',
          'Shots', 'Icetime_seconds', 'Hits', 'Shots_blocked', 'Penalties_against', 'Rating']].shift(-1)
forwards[['T2_Profile', 'T2_Year',	'T2_Goals', 'T2_Assists',	'T2_Plus',	'T2_Minus', 'T2_Penalties',
          'T2_Shots', 'T2_Icetime_seconds', 'T2_Hits', 'T2_Shots_blocked', 'T2_Penalties_against', 'T2_Rating']] \
    = forwards[['Profile',  'Year',	'Goals', 'Assists',	'Plus',	'Minus', 'Penalties',
                'Shots', 'Icetime_seconds', 'Hits', 'Shots_blocked', 'Penalties_against', 'Rating']].shift(-2)
forwards.head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating,T1_Profile,T1_Year,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Icetime_seconds,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_Rating,T2_Profile,T2_Year,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Icetime_seconds,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_Rating
0,https://en.khl.ru/players/10769/,2014/2015,13,23,30,42,47,151,51317,136.0,16.0,26.0,239,https://en.khl.ru/players/10769/,2015/2016,20.0,21.0,33.0,28.0,65.0,156.0,50225.0,112.0,14.0,9.0,276.0,https://en.khl.ru/players/10769/,2016/2017,16.0,18.0,26.0,17.0,49.0,123.0,50416.0,88.0,20.0,10.0,241.0
1,https://en.khl.ru/players/10769/,2015/2016,20,21,33,28,65,156,50225,112.0,14.0,9.0,276,https://en.khl.ru/players/10769/,2016/2017,16.0,18.0,26.0,17.0,49.0,123.0,50416.0,88.0,20.0,10.0,241.0,https://en.khl.ru/players/10769/,2017/2018,3.0,10.0,13.0,16.0,16.0,43.0,22301.0,40.0,12.0,1.0,76.0
2,https://en.khl.ru/players/10769/,2016/2017,16,18,26,17,49,123,50416,88.0,20.0,10.0,241,https://en.khl.ru/players/10769/,2017/2018,3.0,10.0,13.0,16.0,16.0,43.0,22301.0,40.0,12.0,1.0,76.0,https://en.khl.ru/players/10769/,2018/2019,14.0,14.0,28.0,27.0,28.0,105.0,49657.0,73.0,15.0,15.0,201.0
3,https://en.khl.ru/players/10769/,2017/2018,3,10,13,16,16,43,22301,40.0,12.0,1.0,76,https://en.khl.ru/players/10769/,2018/2019,14.0,14.0,28.0,27.0,28.0,105.0,49657.0,73.0,15.0,15.0,201.0,https://en.khl.ru/players/10769/,2019/2020,17.0,17.0,33.0,50.0,36.0,123.0,61759.0,84.0,27.0,6.0,200.0
4,https://en.khl.ru/players/10769/,2018/2019,14,14,28,27,28,105,49657,73.0,15.0,15.0,201,https://en.khl.ru/players/10769/,2019/2020,17.0,17.0,33.0,50.0,36.0,123.0,61759.0,84.0,27.0,6.0,200.0,https://en.khl.ru/players/10769/,2020/2021,2.0,8.0,9.0,18.0,31.0,36.0,22012.0,18.0,16.0,2.0,32.0


In [6]:
forwards = forwards[(forwards['Profile'] == forwards['T1_Profile']) & (forwards['Profile'] == forwards['T2_Profile'])]
forwards.reset_index(drop=True, inplace=True)
forwards.head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating,T1_Profile,T1_Year,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Icetime_seconds,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_Rating,T2_Profile,T2_Year,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Icetime_seconds,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_Rating
0,https://en.khl.ru/players/10769/,2014/2015,13,23,30,42,47,151,51317,136.0,16.0,26.0,239,https://en.khl.ru/players/10769/,2015/2016,20.0,21.0,33.0,28.0,65.0,156.0,50225.0,112.0,14.0,9.0,276.0,https://en.khl.ru/players/10769/,2016/2017,16.0,18.0,26.0,17.0,49.0,123.0,50416.0,88.0,20.0,10.0,241.0
1,https://en.khl.ru/players/10769/,2015/2016,20,21,33,28,65,156,50225,112.0,14.0,9.0,276,https://en.khl.ru/players/10769/,2016/2017,16.0,18.0,26.0,17.0,49.0,123.0,50416.0,88.0,20.0,10.0,241.0,https://en.khl.ru/players/10769/,2017/2018,3.0,10.0,13.0,16.0,16.0,43.0,22301.0,40.0,12.0,1.0,76.0
2,https://en.khl.ru/players/10769/,2016/2017,16,18,26,17,49,123,50416,88.0,20.0,10.0,241,https://en.khl.ru/players/10769/,2017/2018,3.0,10.0,13.0,16.0,16.0,43.0,22301.0,40.0,12.0,1.0,76.0,https://en.khl.ru/players/10769/,2018/2019,14.0,14.0,28.0,27.0,28.0,105.0,49657.0,73.0,15.0,15.0,201.0
3,https://en.khl.ru/players/10769/,2017/2018,3,10,13,16,16,43,22301,40.0,12.0,1.0,76,https://en.khl.ru/players/10769/,2018/2019,14.0,14.0,28.0,27.0,28.0,105.0,49657.0,73.0,15.0,15.0,201.0,https://en.khl.ru/players/10769/,2019/2020,17.0,17.0,33.0,50.0,36.0,123.0,61759.0,84.0,27.0,6.0,200.0
4,https://en.khl.ru/players/10769/,2018/2019,14,14,28,27,28,105,49657,73.0,15.0,15.0,201,https://en.khl.ru/players/10769/,2019/2020,17.0,17.0,33.0,50.0,36.0,123.0,61759.0,84.0,27.0,6.0,200.0,https://en.khl.ru/players/10769/,2020/2021,2.0,8.0,9.0,18.0,31.0,36.0,22012.0,18.0,16.0,2.0,32.0


In [7]:
forwards.drop(['T1_Profile', 'T2_Profile'], axis=1, inplace=True)
forwards.drop(['T1_Year', 'T2_Year'], axis=1, inplace=True)
forwards.drop(['Profile'], axis=1, inplace=True)

In [8]:
dummies = forwards.copy()
dummies = pd.get_dummies(dummies, drop_first=True)
dummies.head()

Unnamed: 0,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Icetime_seconds,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_Rating,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Icetime_seconds,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_Rating,Year_2015/2016,Year_2016/2017,Year_2017/2018,Year_2018/2019,Year_2019/2020
0,13,23,30,42,47,151,51317,136.0,16.0,26.0,239,20.0,21.0,33.0,28.0,65.0,156.0,50225.0,112.0,14.0,9.0,276.0,16.0,18.0,26.0,17.0,49.0,123.0,50416.0,88.0,20.0,10.0,241.0,0,0,0,0,0
1,20,21,33,28,65,156,50225,112.0,14.0,9.0,276,16.0,18.0,26.0,17.0,49.0,123.0,50416.0,88.0,20.0,10.0,241.0,3.0,10.0,13.0,16.0,16.0,43.0,22301.0,40.0,12.0,1.0,76.0,1,0,0,0,0
2,16,18,26,17,49,123,50416,88.0,20.0,10.0,241,3.0,10.0,13.0,16.0,16.0,43.0,22301.0,40.0,12.0,1.0,76.0,14.0,14.0,28.0,27.0,28.0,105.0,49657.0,73.0,15.0,15.0,201.0,0,1,0,0,0
3,3,10,13,16,16,43,22301,40.0,12.0,1.0,76,14.0,14.0,28.0,27.0,28.0,105.0,49657.0,73.0,15.0,15.0,201.0,17.0,17.0,33.0,50.0,36.0,123.0,61759.0,84.0,27.0,6.0,200.0,0,0,1,0,0
4,14,14,28,27,28,105,49657,73.0,15.0,15.0,201,17.0,17.0,33.0,50.0,36.0,123.0,61759.0,84.0,27.0,6.0,200.0,2.0,8.0,9.0,18.0,31.0,36.0,22012.0,18.0,16.0,2.0,32.0,0,0,0,1,0


In [9]:
y = dummies['T2_Rating'].copy()
X = dummies.drop(
    ['T2_Rating', 'T2_Goals', 'T2_Assists',	'T2_Plus',	'T2_Minus', 'T2_Penalties',
     'T2_Shots', 'T2_Icetime_seconds', 'T2_Hits', 'T2_Shots_blocked', 'T2_Penalties_against'], axis=1).copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [70]:
print(X_test.head())

      Goals  Assists  Plus  Minus  Penalties  Shots  Icetime_seconds  Hits  \
413      10        5    15     15         14     64            31377  16.0   
316      10        8    28     12          6    108            31162  17.0   
1034      3       10    23     17         20     71            37643  27.0   
65       12       18    30     29         30     90            49235  55.0   
1024      4        0     6      2          4     27            15880  22.0   

      Shots_blocked  Penalties_against  Rating  T1_Goals  T1_Assists  T1_Plus  \
413            13.0               13.0     112       4.0         2.0     10.0   
316             5.0                8.0     179      25.0        18.0     29.0   
1034           22.0                9.0     106       5.0         6.0     27.0   
65             13.0               15.0     196       2.0         9.0     10.0   
1024            5.0                6.0      51       1.0         6.0      5.0   

      T1_Minus  T1_Penalties  T1_Shots  T1_I

In [10]:
y_pred = X_test['T1_Rating']

In [93]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))


MSE: 9197.745361662819
RMSE: 95.9048766312893
MAE: 72.64535796766744
R_squared: 0.2824965043629657
MAPE: 0.6590206400374361


In [11]:
y_pred = (X_test['Rating'] + X_test['T1_Rating']) / 2

In [74]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))


MSE: 6574.959584295612
RMSE: 81.08612448684184
MAE: 62.66743648960739
R_squared: 0.28121753856378684


In [12]:
from sklearn.linear_model import LinearRegression

linear = LinearRegression(n_jobs=-1)
linear.fit(X_train, y_train)
y_pred = linear.predict(X_test)

In [124]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

# Getting coefficients in a more readable form.
linear_coef = pd.DataFrame(zip(X.columns, linear.coef_))
linear_coef.columns = ['Feature', 'Coefficient']
print(linear_coef)

MSE: 25943059.98905302
RMSE: 5093.433025872925
MAE: 898.5711676768377
R_squared: -0.0005034233367244845
MAPE: 17.389083180479687
                                          Feature   Coefficient
0                                           Goals -3.578504e+10
1                                         Assists -2.764620e+10
2                                            Plus -1.595470e+10
3                                           Minus  1.530876e+10
4                                       Penalties  4.198606e+09
5                                           Shots -3.035915e+09
6                                 Icetime_seconds -1.959903e-05
7                                            Hits -1.744036e+09
8                                   Shots_blocked -3.746448e+09
9                               Penalties_against -5.296703e+09
10                                         Rating  6.459393e+09
11                                       T1_Goals -1.555581e+10
12                                     

In [13]:
linear.score()

TypeError: score() missing 2 required positional arguments: 'X' and 'y'

In [114]:
from sklearn.ensemble import RandomForestRegressor

forest = RandomForestRegressor(n_jobs=-1)
forest.fit(X_train, y_train)
y_pred = forest.predict(X_test)


In [115]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))


MSE: 6281.310614179424
RMSE: 79.25471982272995
MAE: 61.91260300230947
R_squared: -0.00257484248021167
MAPE: 0.431015642181195


In [116]:
from sklearn.ensemble import GradientBoostingRegressor

gradient = GradientBoostingRegressor()
gradient.fit(X_train, y_train)
y_pred = gradient.predict(X_test)

In [None]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))


In [119]:
from sklearn.linear_model import LassoCV

lasso = LassoCV()
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)

In [120]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

MSE: 6210.0475316866
RMSE: 78.80385480220241
MAE: 61.89201478381197
R_squared: -0.2697016601845803
MAPE: 0.4135151560017307


In [121]:
from sklearn.linear_model import Ridge

ridge = Ridge()
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)

In [122]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))




MSE: 7374.822263559906
RMSE: 85.87678535879127
MAE: 67.27801374634588
R_squared: 0.1824268827320532
MAPE: 0.49690804465106714
