## **Supervised ML regression algorithm to predict next round team value (CT & T)**
## **Algorithm**

Input data: Obtained DataFrames from the previous prediction:
- ct_predicted_value
- t_predicted_value

Preprocces the data to create a new traget column, next round team value (**nxt_rnd_ct_val** & **nxt_rnd_t_val**)

In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.linear_model import Lasso

from lightgbm import LGBMRegressor
from joblib import dump, load

In [2]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

### Load Data

In [3]:
ct_df = pd.read_csv('../data/processed/3_base_predict_next_rnd_ct_val.csv')
t_df = pd.read_csv('../data/processed/3_base_predict_next_rnd_t_val.csv')

In [4]:
display(ct_df.head())
display(t_df.head())

Unnamed: 0.1,Unnamed: 0,file,round,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,ct_val_pred,nxt_rnd_ct_val
0,0,0,1,1000.0,550,5,5,0.5,0.5,0,0,4550,4078.134589,17819.702711
1,1,0,2,10100.0,1100,4,0,1.0,0.0,1,0,18450,17819.702711,7038.468589
2,2,0,3,4125.0,900,0,1,0.0,0.0,0,1,9550,7038.468589,1452.468928
3,3,0,4,1000.0,0,0,3,0.0,1.0,0,2,1600,1452.468928,22676.205763
4,4,0,5,15500.0,1400,0,4,0.0,1.0,0,3,23350,22676.205763,26585.694068


Unnamed: 0.1,Unnamed: 0,file,round,wp_t_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,t_val_real,t_val_pred,nxt_rnd_t_val
0,0,0,1,1166.666667,1200,5,5,0.5,0.5,0,0,3850,3943.272665,6290.616771
1,1,0,2,3687.5,50,4,0,1.0,0.0,1,0,5300,6290.616771,19600.790638
2,2,0,3,11700.0,2450,0,1,0.0,0.0,0,1,22900,19600.790638,22568.098741
3,3,0,4,11700.0,1600,0,3,0.0,1.0,0,2,19650,22568.098741,24459.855175
4,4,0,5,12750.0,1700,0,4,0.0,1.0,0,3,21750,24459.855175,11755.134124


### Define Features and Target

In [5]:
# CT_FEATS = ['file', 'round', 'wp_ct_val', 'nade_ct_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 'ct_val_real', 'ct_val_pred']
# T_FEATS = ['file', 'round', 'wp_t_val', 'nade_t_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 't_val_real', 't_val_pred']

CT_FEATS = ['file', 'round', 'wp_ct_val', 'nade_ct_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 'ct_val_real']
T_FEATS = ['file', 'round', 'wp_t_val', 'nade_t_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 't_val_real']

CT_TARGET = 'nxt_rnd_ct_val'
T_TARGET = 'nxt_rnd_t_val'

### Preprocessor

In [6]:
#Preprocessor

numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

preprocessor_ct = ColumnTransformer(transformers=[('num', numeric_transformer, CT_FEATS)])
preprocessor_t = ColumnTransformer(transformers=[('num', numeric_transformer, T_FEATS)])

### **SPLITS**

In [7]:
ct_train, ct_test = train_test_split(ct_df)
t_train, t_test = train_test_split(t_df)

In [8]:
print(ct_train.shape, ct_test.shape)
print(t_train.shape, t_test.shape)

(233109, 14) (77704, 14)
(233109, 14) (77704, 14)


### **MODEL**

#### We take **LightGBM** as the regressor algorithm, like the previous prediction

In [9]:
regressor = LGBMRegressor(boosting_type='gbdt', 
                       bagging_freq=1, 
                       bagging_fraction = 0.9, 
                       n_estimators=100)

# regressor = Lasso()

In [10]:
ct_model = Pipeline(steps=[('ct_preprocessor', preprocessor_ct),
                           ('regressor', regressor)])

# t_model = Pipeline(steps=[('t_preprocessor', preprocessor_t),
#                            ('regressor', regressor)])

In [11]:
ct_model.fit(ct_train[CT_FEATS], ct_train[CT_TARGET]);

# t_model.fit(t_train[T_FEATS], t_train[T_TARGET]);

### **CHECK PERFORMANCE**

In [12]:
y_ct_test = ct_model.predict(ct_test[CT_FEATS])
y_ct_train = ct_model.predict(ct_train[CT_FEATS])

# y_t_test = t_model.predict(t_test[T_FEATS])
# y_t_train = t_model.predict(t_train[T_FEATS])

In [13]:
# Mean_squared_error

print(f"CT test error: {mean_squared_error(y_pred=y_ct_test, y_true=ct_test[CT_TARGET], squared=False)}")
print(f"CT train error: {mean_squared_error(y_pred=y_ct_train, y_true=ct_train[CT_TARGET], squared=False)}")
# print()
# print(f"T test error: {mean_squared_error(y_pred=y_t_test, y_true=t_test[T_TARGET], squared=False)}")
# print(f"T train error: {mean_squared_error(y_pred=y_t_train, y_true=t_train[T_TARGET], squared=False)}")

CT test error: 8398.556925775038
CT train error: 8327.822972010204


In [14]:
# r2_score

print(f"CT test error: {r2_score(y_pred=y_ct_test, y_true=ct_test[CT_TARGET])}")
print(f"CT train error: {r2_score(y_pred=y_ct_train, y_true=ct_train[CT_TARGET])}")
print()
# print(f"T test error: {r2_score(y_pred=y_t_test, y_true=t_test[T_TARGET])}")
# print(f"T train error: {r2_score(y_pred=y_t_train, y_true=t_train[T_TARGET])}")

CT test error: 0.35511175135801876
CT train error: 0.3649841613638991



--------------------------------
### LGBM REGRESSOR
CT test error:______Base: 1402.436|| 0.82444______<font color='green'>Hyp. tuning:  || </font>

CT train error:______Base: 1131.833|| 0.88526______<font color='green'>Hyp. tuning:  || </font>

T test error:______Base: 2725.320 || 0.90388______<font color='green'>Hyp. tuning:  || </font>

T train error:______Base: 2728.544 || 0.90519______<font color='green'>Hyp. tuning:  || </font>

In [15]:
ct_df.describe()

Unnamed: 0.1,Unnamed: 0,file,round,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,ct_val_pred,nxt_rnd_ct_val
count,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0
mean,155406.0,6087.770621,13.636219,10886.634767,1900.744499,1.616708,1.525879,0.489547,0.471372,1.22548,1.287926,18887.598331,18887.598331,18728.614795
std,89724.128946,3516.568823,8.036385,6302.263974,1445.294768,1.711394,1.697354,0.48999,0.489265,1.874231,1.889379,10547.244034,10194.148476,10452.523997
min,0.0,0.0,1.0,0.0,0.0,-5.0,-5.0,0.0,0.0,0.0,0.0,200.0,827.137617,0.0
25%,77703.0,3043.0,7.0,4250.0,600.0,0.0,0.0,0.0,0.0,0.0,0.0,7400.0,7199.14524,7198.371119
50%,155406.0,6087.0,13.0,12500.0,1700.0,1.0,1.0,0.5,0.0,0.0,0.0,21750.0,22118.515884,22118.515884
75%,233109.0,9130.0,20.0,15812.5,2900.0,3.0,3.0,1.0,1.0,2.0,2.0,28150.0,27909.289358,27909.289358
max,310812.0,12184.0,60.0,25750.0,9300.0,5.0,5.0,1.0,1.0,21.0,23.0,42050.0,39034.491534,39034.491534


In [16]:
t_df.describe()

Unnamed: 0.1,Unnamed: 0,file,round,wp_t_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,t_val_real,t_val_pred,nxt_rnd_t_val
count,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0
mean,155406.0,6087.770621,13.636219,10678.347979,1690.516323,1.616708,1.525879,0.489547,0.471372,1.22548,1.287926,18229.730738,18229.730738,18076.039941
std,89724.128946,3516.568823,8.036385,5345.656364,1205.368292,1.711394,1.697354,0.48999,0.489265,1.874231,1.889379,8837.245568,8404.748613,8695.758995
min,0.0,0.0,1.0,0.0,0.0,-5.0,-5.0,0.0,0.0,0.0,0.0,400.0,397.334292,0.0
25%,77703.0,3043.0,7.0,6750.0,700.0,0.0,0.0,0.0,0.0,0.0,0.0,10700.0,11538.383157,11538.383157
50%,155406.0,6087.0,13.0,13125.0,1600.0,1.0,1.0,0.5,0.0,0.0,0.0,21100.0,21439.971221,21439.971221
75%,233109.0,9130.0,20.0,13500.0,2600.0,3.0,3.0,1.0,1.0,2.0,2.0,25450.0,25082.831162,25082.831162
max,310812.0,12184.0,60.0,25000.0,8300.0,5.0,5.0,1.0,1.0,21.0,23.0,38150.0,33197.210073,33197.210073
