# Pycaret modeling 

In [2]:
import pandas as pd
from pycaret.regression import *

In [14]:
players_stats = pd.read_csv('data/players_stats.csv' ,sep=';')

In [15]:
players_stats.head(5)

Unnamed: 0,id,player,player_team,s1,opponent,s2,rating2.0_player,map,date,event_type,event_tier,team_rank,opponent_rank,rank_difference,favorite,round_difference,win,rating2.0_team
0,11893,zywoo,Vitality,10,AVANGAR,16,1.04,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766
1,7322,apex,Vitality,10,AVANGAR,16,0.93,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766
2,8184,alex,Vitality,10,AVANGAR,16,1.0,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766
3,7168,nbk,Vitality,10,AVANGAR,16,0.34,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766
4,7169,rpk,Vitality,10,AVANGAR,16,0.52,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766


In [16]:
players_stats.drop(['opponent','player','player_team'],axis = 1,inplace = True)

In [17]:
players_stats = players_stats.astype({'id':str})
players_stats['date'] = pd.to_datetime(players_stats['date'])

In [18]:
players_stats.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56472 entries, 0 to 56471
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   id                56472 non-null  object        
 1   s1                56472 non-null  int64         
 2   s2                56472 non-null  int64         
 3   rating2.0_player  56472 non-null  float64       
 4   map               56472 non-null  object        
 5   date              56472 non-null  datetime64[ns]
 6   event_type        56472 non-null  object        
 7   event_tier        56472 non-null  object        
 8   team_rank         56472 non-null  float64       
 9   opponent_rank     56472 non-null  float64       
 10  rank_difference   56472 non-null  float64       
 11  favorite          56472 non-null  bool          
 12  round_difference  56472 non-null  int64         
 13  win               56472 non-null  bool          
 14  rating2.0_team    5647

# General model

In [43]:
model = setup(data = players_stats,target='rating2.0_player',session_id=42,normalize=True)

Unnamed: 0,Description,Value
0,session_id,42
1,Target,rating2.0_player
2,Original Data,"(56472, 15)"
3,Missing Values,False
4,Numeric Features,8
5,Categorical Features,5
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(39530, 45)"


In [44]:
best = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.1674,0.0471,0.2169,0.5709,0.1026,0.1704,0.052
gbr,Gradient Boosting Regressor,0.1702,0.0487,0.2206,0.5562,0.1044,0.1735,0.537
omp,Orthogonal Matching Pursuit,0.1725,0.05,0.2236,0.5442,0.1058,0.1756,0.017
br,Bayesian Ridge,0.1727,0.0501,0.2237,0.5437,0.1058,0.1758,0.054
ridge,Ridge Regression,0.1727,0.0501,0.2237,0.5436,0.1059,0.1758,0.017
huber,Huber Regressor,0.1725,0.0501,0.2239,0.543,0.1057,0.1742,0.522
lr,Linear Regression,0.1732,0.0503,0.2243,0.541,0.1061,0.1763,0.021
rf,Random Forest Regressor,0.1733,0.0534,0.2311,0.5131,0.1088,0.1752,1.332
ada,AdaBoost Regressor,0.1829,0.054,0.2323,0.5078,0.1115,0.1966,0.32
knn,K Neighbors Regressor,0.2047,0.0684,0.2615,0.3764,0.1237,0.2084,0.295


In [23]:
tune_players = tune_model(best)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.164,0.0449,0.212,0.5721,0.1016,0.1704
1,0.1711,0.0498,0.2232,0.5508,0.1055,0.1738
2,0.1704,0.0483,0.2197,0.5672,0.103,0.1699
3,0.1653,0.0465,0.2156,0.572,0.1018,0.1679
4,0.1728,0.0497,0.2229,0.5534,0.1047,0.1726
5,0.1729,0.0491,0.2216,0.5537,0.105,0.1763
6,0.1699,0.0483,0.2199,0.5591,0.1039,0.1729
7,0.1667,0.0462,0.2149,0.5775,0.1019,0.1711
8,0.1661,0.0459,0.2142,0.5755,0.1019,0.1711
9,0.1699,0.0484,0.22,0.569,0.1041,0.1725


In [24]:
evaluate_model(best)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [25]:
players_stats_predicted = predict_model(tune_players, data = players_stats)
players_stats_predicted.head(150)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,0.1669,0.0466,0.2158,0.573,0.102,0.1693


Unnamed: 0,id,s1,s2,rating2.0_player,map,date,event_type,event_tier,team_rank,opponent_rank,rank_difference,favorite,round_difference,win,rating2.0_team,Label
0,11893,10,16,1.04,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766,0.934012
1,7322,10,16,0.93,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766,0.719354
2,8184,10,16,1.00,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766,0.716131
3,7168,10,16,0.34,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766,0.719354
4,7169,10,16,0.52,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766,0.719354
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,11893,7,16,0.91,mrg,2019-02-24,lan,A,12.0,6.0,6.0,False,-9,False,0.924,1.098866
146,7322,7,16,0.98,mrg,2019-02-24,lan,A,12.0,6.0,6.0,False,-9,False,0.924,0.884208
147,8184,7,16,0.96,mrg,2019-02-24,lan,A,12.0,6.0,6.0,False,-9,False,0.924,0.880985
148,7168,7,16,1.07,mrg,2019-02-24,lan,A,12.0,6.0,6.0,False,-9,False,0.924,0.884208


# Model zywoo

In [27]:
zywoo = players_stats.query('id == "11893"')
zywoo

Unnamed: 0,id,s1,s2,rating2.0_player,map,date,event_type,event_tier,team_rank,opponent_rank,rank_difference,favorite,round_difference,win,rating2.0_team
0,11893,10,16,1.04,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766
5,11893,10,16,1.04,d2,2019-05-09,big_events,S,11.0,14.0,-3.0,True,-6,False,0.766
10,11893,10,16,1.04,d2,2019-05-09,lan,A,11.0,14.0,-3.0,True,-6,False,0.766
15,11893,16,11,1.63,inf,2019-05-09,major,S,11.0,14.0,-3.0,True,5,True,1.176
20,11893,16,11,1.63,inf,2019-05-09,big_events,S,11.0,14.0,-3.0,True,5,True,1.176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33297,11893,16,11,1.95,cch,2019-01-29,online,B,19.0,35.0,-16.0,True,5,True,1.206
33302,11893,16,11,1.44,mrg,2019-08-01,online,B,2.0,17.0,-15.0,True,5,True,1.130
33307,11893,22,20,1.24,inf,2019-08-01,online,B,2.0,17.0,-15.0,True,2,True,1.030
33312,11893,16,7,1.39,mrg,2019-08-01,online,B,2.0,35.0,-33.0,True,9,True,1.266


In [29]:
model_zywoo = setup(data = zywoo,target='rating2.0_player',session_id=42,normalize=True)

Unnamed: 0,Description,Value
0,session_id,42
1,Target,rating2.0_player
2,Original Data,"(407, 15)"
3,Missing Values,False
4,Numeric Features,6
5,Categorical Features,7
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(284, 61)"


In [30]:
best_zywoo = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
rf,Random Forest Regressor,0.1416,0.0372,0.1902,0.6429,0.0796,0.1104,0.024
et,Extra Trees Regressor,0.1249,0.0365,0.1889,0.6419,0.0786,0.096,0.022
gbr,Gradient Boosting Regressor,0.1505,0.0417,0.2013,0.6047,0.0842,0.1175,0.009
lightgbm,Light Gradient Boosting Machine,0.1543,0.0423,0.2036,0.5873,0.0861,0.1213,0.005
ada,AdaBoost Regressor,0.1693,0.0442,0.2079,0.5689,0.0887,0.1354,0.011
br,Bayesian Ridge,0.169,0.0456,0.2108,0.5617,0.0901,0.1343,0.004
omp,Orthogonal Matching Pursuit,0.1724,0.0481,0.2168,0.5337,0.0934,0.139,0.003
ridge,Ridge Regression,0.1786,0.0495,0.2202,0.5148,0.0946,0.1425,0.003
lr,Linear Regression,0.1853,0.0526,0.2271,0.483,0.098,0.1483,0.003
knn,K Neighbors Regressor,0.1848,0.0558,0.233,0.4658,0.0998,0.1483,0.004


In [31]:
evaluate_model(best_zywoo)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [32]:
tune = tune_model(best_zywoo)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.167,0.0433,0.2081,0.6363,0.0907,0.1353
1,0.162,0.0394,0.1986,0.6646,0.0847,0.1333
2,0.1314,0.0232,0.1525,0.6163,0.0632,0.0973
3,0.1301,0.0275,0.1659,0.5051,0.0719,0.1014
4,0.1622,0.0371,0.1927,0.515,0.0863,0.1441
5,0.1784,0.0476,0.2183,0.5436,0.0922,0.141
6,0.2309,0.0743,0.2725,0.5736,0.1191,0.1982
7,0.1875,0.0562,0.237,0.5579,0.1009,0.1426
8,0.1688,0.0438,0.2093,0.5913,0.0912,0.1379
9,0.1961,0.0628,0.2507,0.4701,0.1031,0.1493


In [33]:
zywoo_predicted = predict_model(tune, data = zywoo)
zywoo_predicted.head(150)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Random Forest Regressor,0.1474,0.0362,0.1902,0.6799,0.0797,0.115


Unnamed: 0,id,s1,s2,rating2.0_player,map,date,event_type,event_tier,team_rank,opponent_rank,rank_difference,favorite,round_difference,win,rating2.0_team,Label
0,11893,10,16,1.04,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766,1.064326
5,11893,10,16,1.04,d2,2019-05-09,big_events,S,11.0,14.0,-3.0,True,-6,False,0.766,1.068819
10,11893,10,16,1.04,d2,2019-05-09,lan,A,11.0,14.0,-3.0,True,-6,False,0.766,1.068819
15,11893,16,11,1.63,inf,2019-05-09,major,S,11.0,14.0,-3.0,True,5,True,1.176,1.420073
20,11893,16,11,1.63,inf,2019-05-09,big_events,S,11.0,14.0,-3.0,True,5,True,1.176,1.423096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4425,11893,11,16,1.21,d2,2019-07-21,lan,A,2.0,1.0,1.0,False,-5,False,0.954,1.095193
4430,11893,16,9,1.31,ovp,2019-07-20,big_events,S,2.0,14.0,-12.0,True,7,True,1.206,1.389918
4435,11893,16,9,1.31,ovp,2019-07-20,lan,A,2.0,14.0,-12.0,True,7,True,1.206,1.391810
4440,11893,12,16,1.57,d2,2019-07-20,big_events,S,2.0,14.0,-12.0,True,-4,False,0.950,1.243977


## Alex

In [35]:
alex = players_stats.query('id == "11893"')
alex

Unnamed: 0,id,s1,s2,rating2.0_player,map,date,event_type,event_tier,team_rank,opponent_rank,rank_difference,favorite,round_difference,win,rating2.0_team
0,11893,10,16,1.04,d2,2019-05-09,major,S,11.0,14.0,-3.0,True,-6,False,0.766
5,11893,10,16,1.04,d2,2019-05-09,big_events,S,11.0,14.0,-3.0,True,-6,False,0.766
10,11893,10,16,1.04,d2,2019-05-09,lan,A,11.0,14.0,-3.0,True,-6,False,0.766
15,11893,16,11,1.63,inf,2019-05-09,major,S,11.0,14.0,-3.0,True,5,True,1.176
20,11893,16,11,1.63,inf,2019-05-09,big_events,S,11.0,14.0,-3.0,True,5,True,1.176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33297,11893,16,11,1.95,cch,2019-01-29,online,B,19.0,35.0,-16.0,True,5,True,1.206
33302,11893,16,11,1.44,mrg,2019-08-01,online,B,2.0,17.0,-15.0,True,5,True,1.130
33307,11893,22,20,1.24,inf,2019-08-01,online,B,2.0,17.0,-15.0,True,2,True,1.030
33312,11893,16,7,1.39,mrg,2019-08-01,online,B,2.0,35.0,-33.0,True,9,True,1.266


In [41]:
model_alex  = setup(data = alex,target='rating2.0_player',session_id=42,normalize=True)

Unnamed: 0,Description,Value
0,session_id,42
1,Target,rating2.0_player
2,Original Data,"(407, 15)"
3,Missing Values,False
4,Numeric Features,6
5,Categorical Features,7
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(284, 61)"


In [39]:
predict_model = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
rf,Random Forest Regressor,0.1416,0.0372,0.1902,0.6429,0.0796,0.1104,0.024
et,Extra Trees Regressor,0.1249,0.0365,0.1889,0.6419,0.0786,0.096,0.024
gbr,Gradient Boosting Regressor,0.1505,0.0417,0.2013,0.6047,0.0842,0.1175,0.009
lightgbm,Light Gradient Boosting Machine,0.1543,0.0423,0.2036,0.5873,0.0861,0.1213,0.005
ada,AdaBoost Regressor,0.1693,0.0442,0.2079,0.5689,0.0887,0.1354,0.01
br,Bayesian Ridge,0.169,0.0456,0.2108,0.5617,0.0901,0.1343,0.003
omp,Orthogonal Matching Pursuit,0.1724,0.0481,0.2168,0.5337,0.0934,0.139,0.003
ridge,Ridge Regression,0.1786,0.0495,0.2202,0.5148,0.0946,0.1425,0.003
lr,Linear Regression,0.1853,0.0526,0.2271,0.483,0.098,0.1483,0.003
knn,K Neighbors Regressor,0.1848,0.0558,0.233,0.4658,0.0998,0.1483,0.004


In [40]:
evaluate_model(predict_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…