## **Supervised ML regression algorithm to predict next round team value (CT & T)**
## **Algorithm**

In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import classification_report

from lightgbm import LGBMClassifier
from joblib import dump

In [2]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

### Data

In [3]:
ct_df = pd.read_csv('../data/processed/4_base_predict_next_rnd_ct_type.csv')
t_df = pd.read_csv('../data/processed/4_base_predict_next_rnd_t_type.csv')

In [4]:
display(ct_df.head())
display(t_df.head())

Unnamed: 0,file,round,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_pred,round_type,nxt_rnd_type
0,0,1,1000.0,550,5,5,0.5,0.5,0,0,4078.134589,PISTOL_ROUND,MEDIUM
1,0,2,10100.0,1100,4,0,1.0,0.0,1,0,17819.702711,MEDIUM,MEDIUM
2,0,3,4125.0,900,0,1,0.0,0.0,0,1,7038.468589,MEDIUM,ECO
3,0,4,1000.0,0,0,3,0.0,1.0,0,2,1452.468928,ECO,FULL
4,0,5,15500.0,1400,0,4,0.0,1.0,0,3,22676.205763,FULL,FULL


Unnamed: 0,file,round,wp_t_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,t_val_pred,round_type,nxt_rnd_type
0,0,1,1166.666667,1200,5,5,0.5,0.5,0,0,3943.272665,PISTOL_ROUND,ECO
1,0,2,3687.5,50,4,0,1.0,0.0,1,0,6290.616771,ECO,FULL
2,0,3,11700.0,2450,0,1,0.0,0.0,0,1,19600.790638,FULL,MEDIUM
3,0,4,11700.0,1600,0,3,0.0,1.0,0,2,22568.098741,MEDIUM,FULL
4,0,5,12750.0,1700,0,4,0.0,1.0,0,3,24459.855175,FULL,MEDIUM


### Encoding

We use a manual encoding to be sure of being the same values for round_type and nxt_rnd_type.

If we use LabelEncoder() it could be possible to have different values for the same label.

In [5]:
ct_df['round_type'].unique()

array(['PISTOL_ROUND', 'MEDIUM', 'ECO', 'FULL'], dtype=object)

In [6]:
round_type_dic = {'PISTOL_ROUND':0, 'ECO':1, 'MEDIUM':2, 'FULL':3, 'LAST':4}

In [7]:
ct_df['round_type'] = ct_df['round_type'].apply(lambda x: round_type_dic[x])
ct_df['nxt_rnd_type'] = ct_df['nxt_rnd_type'].apply(lambda x: round_type_dic[x])

t_df['round_type'] = t_df['round_type'].apply(lambda x: round_type_dic[x])
t_df['nxt_rnd_type'] = t_df['nxt_rnd_type'].apply(lambda x: round_type_dic[x])

In [8]:
display(ct_df.head())
display(t_df.head())

Unnamed: 0,file,round,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_pred,round_type,nxt_rnd_type
0,0,1,1000.0,550,5,5,0.5,0.5,0,0,4078.134589,0,2
1,0,2,10100.0,1100,4,0,1.0,0.0,1,0,17819.702711,2,2
2,0,3,4125.0,900,0,1,0.0,0.0,0,1,7038.468589,2,1
3,0,4,1000.0,0,0,3,0.0,1.0,0,2,1452.468928,1,3
4,0,5,15500.0,1400,0,4,0.0,1.0,0,3,22676.205763,3,3


Unnamed: 0,file,round,wp_t_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,t_val_pred,round_type,nxt_rnd_type
0,0,1,1166.666667,1200,5,5,0.5,0.5,0,0,3943.272665,0,1
1,0,2,3687.5,50,4,0,1.0,0.0,1,0,6290.616771,1,3
2,0,3,11700.0,2450,0,1,0.0,0.0,0,1,19600.790638,3,2
3,0,4,11700.0,1600,0,3,0.0,1.0,0,2,22568.098741,2,3
4,0,5,12750.0,1700,0,4,0.0,1.0,0,3,24459.855175,3,2


## Define features and target

In [9]:
CT_FEATS = ['file', 'round', 'wp_ct_val', 'nade_ct_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 'ct_val_pred', 'round_type']
T_FEATS = ['file', 'round', 'wp_t_val', 'nade_t_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 't_val_pred', 'round_type']
TARGET = 'nxt_rnd_type'

In [10]:
#Preprocessor

numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

preprocessor_ct = ColumnTransformer(transformers=[('num', numeric_transformer, CT_FEATS)])
preprocessor_t = ColumnTransformer(transformers=[('num', numeric_transformer, T_FEATS)])

### **SPLITS**

In [11]:
# X_train, X_test, y_train, y_test = train_test_split(ct_df[CT_FEATS], ct_df[TARGET]) # CT Team

X_train, X_test, y_train, y_test = train_test_split(t_df[T_FEATS], t_df[TARGET]) # T Team

### **MODEL**

### CT Regressor

In [12]:
# regressor = LGBMClassifier(num_leaves=50,
#                           n_estimators=350,
#                           min_data_in_leaf=15,
#                           max_depth=7,
#                           learning_rate=0.055,
#                           feature_fraction=0.8,
#                           bagging_frequency=0.75,
#                           bagging_fraction=0.75)

### T Regressor

In [13]:
regressor = LGBMClassifier(num_leaves=50,
                          n_estimators=350,
                          min_data_in_leaf=15,
                          max_depth=7,
                          learning_rate=0.055,
                          feature_fraction=0.8,
                          bagging_frequency=0.75,
                          bagging_fraction=0.75)

### Models

In [14]:
# ct_model = Pipeline(steps=[('ct_preprocessor', preprocessor_ct),
#                            ('regressor', regressor)])

t_model = Pipeline(steps=[('t_preprocessor', preprocessor_t),
                           ('regressor', regressor)])

In [15]:
# ct_model.fit(X_train, y_train);

t_model.fit(X_train, y_train);

### **CHECK PERFORMANCE**

In [16]:
# ct_model.score(X_test, y_test)

t_model.score(X_test, y_test)

0.6409966024915063

In [17]:
# sk_report = classification_report(
#     digits=6,
#     y_true=y_test, 
#     y_pred=ct_model.predict(X_test))
# print(sk_report)


sk_report = classification_report(
    digits=6,
    y_true=y_test, 
    y_pred=t_model.predict(X_test))
print(sk_report)

              precision    recall  f1-score   support

           0   0.998357  0.907164  0.950579      3350
           1   0.518041  0.355032  0.421319     12213
           2   0.519887  0.351742  0.419596     19918
           3   0.672673  0.890158  0.766283     39129
           4   0.576402  0.192631  0.288760      3094

    accuracy                       0.640997     77704
   macro avg   0.657072  0.539345  0.569307     77704
weighted avg   0.619413  0.640997  0.612128     77704



### **TRAIN MODEL WITH FULL DATASET**

#### **CT Model. Train & Save**

In [11]:
regressor = LGBMClassifier(num_leaves=50,
                          n_estimators=350,
                          min_data_in_leaf=15,
                          max_depth=7,
                          learning_rate=0.055,
                          feature_fraction=0.8,
                          bagging_frequency=0.75,
                          bagging_fraction=0.75)

ct_model = Pipeline(steps=[('ct_preprocessor', preprocessor_ct),
                           ('regressor', regressor)])

ct_model.fit(ct_df[CT_FEATS], ct_df[TARGET]);

ct_df_pred_nxt_rnd = ct_model.predict(ct_df[CT_FEATS])

In [12]:
dump(ct_model, '../models/ct_nxt_rnd_type.joblib') # Save model

['../models/ct_nxt_rnd_type.joblib']

In [13]:
ct_df['ct_nxt_rnd_type_pred'] = ct_df_pred_nxt_rnd

In [14]:
ct_df.to_csv('../data/results/ct_predicted_nxt_rnd.csv', index=False) # Export the CT DataFrame with the prediction column added

#### **T Model. Train & Save**

In [14]:
regressor = LGBMClassifier(num_leaves=50,
                          n_estimators=350,
                          min_data_in_leaf=15,
                          max_depth=7,
                          learning_rate=0.055,
                          feature_fraction=0.8,
                          bagging_frequency=0.75,
                          bagging_fraction=0.75)

t_model = Pipeline(steps=[('t_preprocessor', preprocessor_t),
                           ('regressor', regressor)])

t_model.fit(t_df[T_FEATS], t_df[TARGET]);

t_df_pred_nxt_rnd = t_model.predict(t_df[T_FEATS])

In [15]:
dump(t_model, '../models/t_nxt_rnd_type.joblib') # Save model

['../models/t_nxt_rnd_type.joblib']

In [16]:
t_df['t_nxt_rnd_type_pred'] = t_df_pred_nxt_rnd

In [17]:
t_df.to_csv('../data/results/t_predicted_nxt_rnd.csv', index=False) # Export the T DataFrame with the prediction column added

### **JOINING DATA AND MAKE IT LEGIBLE**

### Load Data

In [None]:
ct_df = pd.read_csv('../data/results/ct_predicted_nxt_rnd.csv')
ct_df = pd.read_csv('../data/results/ct_predicted_nxt_rnd.csv')