In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [2]:
columns_read = ['Profile', 'Player', 'Goals',
                'Assists', 'Plus_minus', 'Penalties',
                'Shots', 'Hits', 'Shots_blocked',
                'Penalties_against', 'Icetime_seconds'
                ]
defenses = pd.read_csv('players/after_2014/defenses_match_after_2014.csv', usecols=columns_read)
defenses = pd.read_csv('players/after_2014/defenses_match_after_2014.csv')
goaltenders = pd.read_csv('players/after_2014/goaltenders_match_after_2014.csv')

In [3]:
columns_read = ['Profile', 'Player', 'Season', 'Year', 'Team', 'Winner',
                'Goals', 'Assists', 'Plus', 'Minus', 'Penalties',
                'Shots', 'Hits', 'Shots_blocked',
                'Penalties_against', 'Icetime_seconds', 'Rating'
                ]
defenses = pd.read_csv('defenses_match_with_rating.csv', usecols=columns_read)

In [4]:
defenses = defenses[defenses['Season'] == 'Regular season']
defenses = defenses[defenses['Points'] >= 10]
defenses = defenses[defenses['Games'] >= 20]
defenses = defenses[defenses['Icetime_seconds'] >= 480]
defenses = defenses[defenses.groupby(['Profile', 'Year']).Profile.transform('count') >= 20]
defenses.groupby(['Profile', 'Year'])['Player'].count()

Profile                           Year     
https://en.khl.ru/players/10162/  2014/2015    57
                                  2015/2016    56
                                  2016/2017    59
                                  2017/2018    46
                                  2019/2020    26
https://en.khl.ru/players/10176/  2014/2015    56
                                  2015/2016    60
                                  2016/2017    74
                                  2017/2018    31
                                  2018/2019    58
                                  2019/2020    44
                                  2020/2021    44
                                  2021/2022    28
https://en.khl.ru/players/10546/  2014/2015    73
                                  2015/2016    55
                                  2016/2017    63
                                  2017/2018    44
                                  2018/2019    62
                                  2019/2020    63
      

In [5]:
defenses = defenses[defenses.groupby(['Profile']).Year.transform('nunique') > 2]
defenses.groupby(['Profile', 'Year']).count()

defenses = defenses.groupby(['Profile', 'Year']).sum().reset_index()

In [6]:
defenses.head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating
0,https://en.khl.ru/players/10162/,2014/2015,1,6,29,39,56,71,64523,22.0,37.0,4.0,15.13
1,https://en.khl.ru/players/10162/,2015/2016,8,17,40,34,62,161,86908,62.0,64.0,5.0,150.3
2,https://en.khl.ru/players/10162/,2016/2017,2,8,36,42,40,124,85217,45.0,55.0,4.0,66.69
3,https://en.khl.ru/players/10162/,2017/2018,0,6,20,30,46,62,60798,18.0,52.0,2.0,14.12
4,https://en.khl.ru/players/10162/,2019/2020,1,7,8,17,18,37,28232,5.0,20.0,1.0,19.49


In [7]:
defenses.groupby(['Profile', 'Year']).sum().reset_index().head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating
0,https://en.khl.ru/players/10162/,2014/2015,1,6,29,39,56,71,64523,22.0,37.0,4.0,15.13
1,https://en.khl.ru/players/10162/,2015/2016,8,17,40,34,62,161,86908,62.0,64.0,5.0,150.3
2,https://en.khl.ru/players/10162/,2016/2017,2,8,36,42,40,124,85217,45.0,55.0,4.0,66.69
3,https://en.khl.ru/players/10162/,2017/2018,0,6,20,30,46,62,60798,18.0,52.0,2.0,14.12
4,https://en.khl.ru/players/10162/,2019/2020,1,7,8,17,18,37,28232,5.0,20.0,1.0,19.49


In [8]:
defenses.to_csv('defenses_seasons.csv', encoding='utf8', index=False)

In [9]:
defenses[['T1_Profile', 'T1_Year',	'T1_Goals', 'T1_Assists',	'T1_Plus',	'T1_Minus', 'T1_Penalties',
          'T1_Shots', 'T1_Icetime_seconds', 'T1_Hits', 'T1_Shots_blocked', 'T1_Penalties_against', 'T1_Rating']] \
    = defenses[['Profile',  'Year',	'Goals', 'Assists',	'Plus',	'Minus', 'Penalties',
          'Shots', 'Icetime_seconds', 'Hits', 'Shots_blocked', 'Penalties_against', 'Rating']].shift(-1)
defenses[['T2_Profile', 'T2_Year',	'T2_Goals', 'T2_Assists',	'T2_Plus',	'T2_Minus', 'T2_Penalties',
          'T2_Shots', 'T2_Icetime_seconds', 'T2_Hits', 'T2_Shots_blocked', 'T2_Penalties_against', 'T2_Rating']] \
    = defenses[['Profile',  'Year',	'Goals', 'Assists',	'Plus',	'Minus', 'Penalties',
                'Shots', 'Icetime_seconds', 'Hits', 'Shots_blocked', 'Penalties_against', 'Rating']].shift(-2)
defenses.head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating,T1_Profile,T1_Year,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Icetime_seconds,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_Rating,T2_Profile,T2_Year,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Icetime_seconds,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_Rating
0,https://en.khl.ru/players/10162/,2014/2015,1,6,29,39,56,71,64523,22.0,37.0,4.0,15.13,https://en.khl.ru/players/10162/,2015/2016,8.0,17.0,40.0,34.0,62.0,161.0,86908.0,62.0,64.0,5.0,150.3,https://en.khl.ru/players/10162/,2016/2017,2.0,8.0,36.0,42.0,40.0,124.0,85217.0,45.0,55.0,4.0,66.69
1,https://en.khl.ru/players/10162/,2015/2016,8,17,40,34,62,161,86908,62.0,64.0,5.0,150.3,https://en.khl.ru/players/10162/,2016/2017,2.0,8.0,36.0,42.0,40.0,124.0,85217.0,45.0,55.0,4.0,66.69,https://en.khl.ru/players/10162/,2017/2018,0.0,6.0,20.0,30.0,46.0,62.0,60798.0,18.0,52.0,2.0,14.12
2,https://en.khl.ru/players/10162/,2016/2017,2,8,36,42,40,124,85217,45.0,55.0,4.0,66.69,https://en.khl.ru/players/10162/,2017/2018,0.0,6.0,20.0,30.0,46.0,62.0,60798.0,18.0,52.0,2.0,14.12,https://en.khl.ru/players/10162/,2019/2020,1.0,7.0,8.0,17.0,18.0,37.0,28232.0,5.0,20.0,1.0,19.49
3,https://en.khl.ru/players/10162/,2017/2018,0,6,20,30,46,62,60798,18.0,52.0,2.0,14.12,https://en.khl.ru/players/10162/,2019/2020,1.0,7.0,8.0,17.0,18.0,37.0,28232.0,5.0,20.0,1.0,19.49,https://en.khl.ru/players/10176/,2014/2015,4.0,11.0,50.0,16.0,25.0,69.0,60585.0,75.0,86.0,10.0,191.67
4,https://en.khl.ru/players/10162/,2019/2020,1,7,8,17,18,37,28232,5.0,20.0,1.0,19.49,https://en.khl.ru/players/10176/,2014/2015,4.0,11.0,50.0,16.0,25.0,69.0,60585.0,75.0,86.0,10.0,191.67,https://en.khl.ru/players/10176/,2015/2016,5.0,9.0,29.0,31.0,14.0,94.0,64616.0,48.0,96.0,9.0,115.25


In [10]:
defenses = defenses[(defenses['Profile'] == defenses['T1_Profile']) & (defenses['Profile'] == defenses['T2_Profile'])]
defenses.reset_index(drop=True, inplace=True)
defenses.head()

Unnamed: 0,Profile,Year,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating,T1_Profile,T1_Year,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Icetime_seconds,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_Rating,T2_Profile,T2_Year,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Icetime_seconds,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_Rating
0,https://en.khl.ru/players/10162/,2014/2015,1,6,29,39,56,71,64523,22.0,37.0,4.0,15.13,https://en.khl.ru/players/10162/,2015/2016,8.0,17.0,40.0,34.0,62.0,161.0,86908.0,62.0,64.0,5.0,150.3,https://en.khl.ru/players/10162/,2016/2017,2.0,8.0,36.0,42.0,40.0,124.0,85217.0,45.0,55.0,4.0,66.69
1,https://en.khl.ru/players/10162/,2015/2016,8,17,40,34,62,161,86908,62.0,64.0,5.0,150.3,https://en.khl.ru/players/10162/,2016/2017,2.0,8.0,36.0,42.0,40.0,124.0,85217.0,45.0,55.0,4.0,66.69,https://en.khl.ru/players/10162/,2017/2018,0.0,6.0,20.0,30.0,46.0,62.0,60798.0,18.0,52.0,2.0,14.12
2,https://en.khl.ru/players/10162/,2016/2017,2,8,36,42,40,124,85217,45.0,55.0,4.0,66.69,https://en.khl.ru/players/10162/,2017/2018,0.0,6.0,20.0,30.0,46.0,62.0,60798.0,18.0,52.0,2.0,14.12,https://en.khl.ru/players/10162/,2019/2020,1.0,7.0,8.0,17.0,18.0,37.0,28232.0,5.0,20.0,1.0,19.49
3,https://en.khl.ru/players/10176/,2014/2015,4,11,50,16,25,69,60585,75.0,86.0,10.0,191.67,https://en.khl.ru/players/10176/,2015/2016,5.0,9.0,29.0,31.0,14.0,94.0,64616.0,48.0,96.0,9.0,115.25,https://en.khl.ru/players/10176/,2016/2017,12.0,29.0,60.0,22.0,22.0,206.0,84282.0,64.0,103.0,9.0,328.18
4,https://en.khl.ru/players/10176/,2015/2016,5,9,29,31,14,94,64616,48.0,96.0,9.0,115.25,https://en.khl.ru/players/10176/,2016/2017,12.0,29.0,60.0,22.0,22.0,206.0,84282.0,64.0,103.0,9.0,328.18,https://en.khl.ru/players/10176/,2017/2018,2.0,8.0,19.0,9.0,29.0,86.0,31320.0,22.0,25.0,4.0,82.5


In [11]:
defenses.drop(['T1_Profile', 'T2_Profile'], axis=1, inplace=True)
defenses.drop(['T1_Year', 'T2_Year'], axis=1, inplace=True)
defenses.drop(['Profile'], axis=1, inplace=True)

In [12]:
dummies = defenses.copy()
dummies = pd.get_dummies(dummies, drop_first=True)
dummies.head()

Unnamed: 0,Goals,Assists,Plus,Minus,Penalties,Shots,Icetime_seconds,Hits,Shots_blocked,Penalties_against,Rating,T1_Goals,T1_Assists,T1_Plus,T1_Minus,T1_Penalties,T1_Shots,T1_Icetime_seconds,T1_Hits,T1_Shots_blocked,T1_Penalties_against,T1_Rating,T2_Goals,T2_Assists,T2_Plus,T2_Minus,T2_Penalties,T2_Shots,T2_Icetime_seconds,T2_Hits,T2_Shots_blocked,T2_Penalties_against,T2_Rating,Year_2015/2016,Year_2016/2017,Year_2017/2018,Year_2018/2019,Year_2019/2020
0,1,6,29,39,56,71,64523,22.0,37.0,4.0,15.13,8.0,17.0,40.0,34.0,62.0,161.0,86908.0,62.0,64.0,5.0,150.3,2.0,8.0,36.0,42.0,40.0,124.0,85217.0,45.0,55.0,4.0,66.69,0,0,0,0,0
1,8,17,40,34,62,161,86908,62.0,64.0,5.0,150.3,2.0,8.0,36.0,42.0,40.0,124.0,85217.0,45.0,55.0,4.0,66.69,0.0,6.0,20.0,30.0,46.0,62.0,60798.0,18.0,52.0,2.0,14.12,1,0,0,0,0
2,2,8,36,42,40,124,85217,45.0,55.0,4.0,66.69,0.0,6.0,20.0,30.0,46.0,62.0,60798.0,18.0,52.0,2.0,14.12,1.0,7.0,8.0,17.0,18.0,37.0,28232.0,5.0,20.0,1.0,19.49,0,1,0,0,0
3,4,11,50,16,25,69,60585,75.0,86.0,10.0,191.67,5.0,9.0,29.0,31.0,14.0,94.0,64616.0,48.0,96.0,9.0,115.25,12.0,29.0,60.0,22.0,22.0,206.0,84282.0,64.0,103.0,9.0,328.18,0,0,0,0,0
4,5,9,29,31,14,94,64616,48.0,96.0,9.0,115.25,12.0,29.0,60.0,22.0,22.0,206.0,84282.0,64.0,103.0,9.0,328.18,2.0,8.0,19.0,9.0,29.0,86.0,31320.0,22.0,25.0,4.0,82.5,1,0,0,0,0


In [13]:
y = dummies['T2_Rating'].copy()
X = dummies.drop(
    ['T2_Rating', 'T2_Goals', 'T2_Assists',	'T2_Plus',	'T2_Minus', 'T2_Penalties',
     'T2_Shots', 'T2_Icetime_seconds', 'T2_Hits', 'T2_Shots_blocked', 'T2_Penalties_against'], axis=1).copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [14]:
print(X_test.head())

     Goals  Assists  Plus  Minus  Penalties  Shots  Icetime_seconds  Hits  \
664      1        3    21     27         11     29            35796  19.0   
363      0        7    28     10         44     62            35273  29.0   
110      0        5    20     12         12     32            39391  25.0   
678      6       19    50     47         24    137           102007  16.0   
39       1        3    18      6          7     37            32105  30.0   

     Shots_blocked  Penalties_against  Rating  T1_Goals  T1_Assists  T1_Plus  \
664           30.0                1.0   18.91       1.0         4.0     21.0   
363           20.0                2.0   73.10       0.0         5.0     31.0   
110           36.0                2.0   58.08       0.0        12.0     26.0   
678           68.0                6.0  153.07       3.0        21.0     49.0   
39            30.0                0.0   61.18       0.0         2.0     12.0   

     T1_Minus  T1_Penalties  T1_Shots  T1_Icetime_second

In [15]:
y_pred = X_test['T1_Rating']

In [16]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))


MSE: 2647.195481176471
RMSE: 51.45090359922234
MAE: 40.70647058823529
R_squared: 0.11888848058127233
MAPE: 0.7552124084865535


In [17]:
y_pred = (X_test['Rating'] + X_test['T1_Rating']) / 2

In [18]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))


MSE: 2092.9065995098035
RMSE: 45.74829613777767
MAE: 36.203274509803926
R_squared: 0.139063834464208
MAPE: 0.6081945803471426


In [19]:
from sklearn.linear_model import LinearRegression

linear = LinearRegression(n_jobs=-1)
linear.fit(X_train, y_train)
y_pred = linear.predict(X_test)

In [20]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

linear_coef = pd.DataFrame(zip(X.columns, linear.coef_))
linear_coef.columns = ['Feature', 'Coefficient']
print(linear_coef)

MSE: 1874.1599730315093
RMSE: 43.291569306638785
MAE: 33.66739333437671
R_squared: -0.4317699131701609
MAPE: 0.45104689913592244
                 Feature   Coefficient
0                  Goals -7.240289e+10
1                Assists -1.028222e+11
2                   Plus -5.354797e+10
3                  Minus  5.178818e+10
4              Penalties  1.432974e+10
5                  Shots -5.530776e+09
6        Icetime_seconds -1.877523e-04
7                   Hits -7.290569e+09
8          Shots_blocked -6.033574e+09
9      Penalties_against -4.600600e+10
10                Rating  2.513989e+10
11              T1_Goals -8.096331e-01
12            T1_Assists -7.375029e-01
13               T1_Plus  9.384155e-01
14              T1_Minus -8.677467e-01
15          T1_Penalties -1.945963e-01
16              T1_Shots  2.042658e-01
17    T1_Icetime_seconds  3.051758e-04
18               T1_Hits  2.620950e-03
19      T1_Shots_blocked -8.938261e-02
20  T1_Penalties_against  9.064765e-01
21           

In [21]:
from sklearn.ensemble import RandomForestRegressor

forest = RandomForestRegressor(n_jobs=-1)
forest.fit(X_train, y_train)
y_pred = forest.predict(X_test)




In [22]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))


MSE: 2059.332819169804
RMSE: 45.37987240142709
MAE: 36.466789019607845
R_squared: -0.5925129427599582
MAPE: 0.47375175964607913


In [41]:
X_pred = forest.predict(X)

X_new = X.copy()

X_new["Predict_rating"] = X_pred

print(X_new)

     Goals  Assists  Plus  Minus  Penalties  Shots  Icetime_seconds   Hits  \
0        1        6    29     39         56     71            64523   22.0   
1        8       17    40     34         62    161            86908   62.0   
2        2        8    36     42         40    124            85217   45.0   
3        4       11    50     16         25     69            60585   75.0   
4        5        9    29     31         14     94            64616   48.0   
5       12       29    60     22         22    206            84282   64.0   
6        2        8    19      9         29     86            31320   22.0   
7        6       11    33     19         12    111            66262   57.0   
8        5        9    33     21          8     65            46945   38.0   
9        0       16    37     22         32     79            75028   60.0   
10       3       16    25     19         42     81            66681   52.0   
11       8        7    32     25         57     99            69

In [35]:
from sklearn.ensemble import GradientBoostingRegressor

gradient = GradientBoostingRegressor()
gradient.fit(X_train, y_train)
y_pred = gradient.predict(X_test)

In [25]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))


MSE: 2061.5959315158907
RMSE: 45.404800754060034
MAE: 36.27868664608771
R_squared: -0.6407905920385126
MAPE: 0.4761443086214442


In [26]:
from sklearn.linear_model import LassoCV

lasso = LassoCV()
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)

In [27]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))

MSE: 2088.895235590126
RMSE: 45.70443343473504
MAE: 36.77953309948898
R_squared: -1.7746616000689146
MAPE: 0.46723438089513375


In [28]:
from sklearn.linear_model import Ridge

ridge = Ridge()
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)

In [29]:
print('MSE:', mean_squared_error(y_pred, y_test, squared=True))
print('RMSE:', mean_squared_error(y_pred, y_test, squared=False))
print('MAE:', mean_absolute_error(y_pred, y_test))
print('R_squared:', r2_score(y_pred, y_test))
print('MAPE:', mean_absolute_percentage_error(y_pred, y_test))




MSE: 1871.3601615880773
RMSE: 43.25922053837861
MAE: 33.64149670645726
R_squared: -0.44449536687856805
MAPE: 0.4497241423891753
