## **Supervised ML classification algorithm to predict next round winner team (CT & T)**
## **Algorithm**

In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import roc_auc_score, classification_report

from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neighbors import KNeighborsClassifier
from lightgbm import LGBMClassifier

from joblib import dump

In [2]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

### Data

In [3]:
df = pd.read_csv('../data/processed/5_1_base_predict_winner.csv')
ct_df = pd.read_csv('../data/processed/for_db_2_ct.csv')
t_df = pd.read_csv('../data/processed/for_db_2_t.csv')

In [4]:
ct_df = pd.concat([ct_df, df['nxt_ct_winner']], axis=1)
t_df = pd.concat([t_df, df['nxt_ct_winner']], axis=1)

In [5]:
display(df.head())
display(ct_df.head())
display(t_df.head())

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,prev_ct_winner,ct_winner,prev_bomb_planted,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_pred,t_val_pred,ct_round_type,t_round_type,ct_nxt_rnd_type_pred,t_nxt_rnd_type_pred,nxt_ct_winner
0,0,1,1000.0,1166.666667,550,1200,5,5,0.5,1,0.5,0,0,0,4078.134589,3943.272665,0,0,2,1,0
1,0,2,10100.0,3687.5,1100,50,4,0,1.0,0,0.0,0,1,0,17819.702711,6290.616771,2,1,3,1,1
2,0,3,4125.0,11700.0,900,2450,0,1,0.0,0,0.0,1,0,1,7038.468589,19600.790638,2,3,1,3,1
3,0,4,1000.0,11700.0,0,1600,0,3,0.0,0,1.0,1,0,2,1452.468928,22568.098741,1,2,3,3,0
4,0,5,15500.0,12750.0,1400,1700,0,4,0.0,1,1.0,0,0,3,22676.205763,24459.855175,3,3,1,3,0


Unnamed: 0,file,round,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,round_type,nxt_rnd_type,de_cache,de_cbble,de_dust2,de_inferno,de_mirage,de_nuke,de_overpass,de_train,nxt_ct_winner
0,0,1,5,5,0.5,0.5,0,0,0,2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0,2,4,0,1.0,0.0,1,0,2,2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0,3,0,1,0.0,0.0,0,1,2,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0,4,0,3,0.0,1.0,0,2,1,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0,5,0,4,0.0,1.0,0,3,3,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


Unnamed: 0,file,round,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,round_type,nxt_rnd_type,de_cache,de_cbble,de_dust2,de_inferno,de_mirage,de_nuke,de_overpass,de_train,nxt_ct_winner
0,0,1,5,5,0.5,0.5,0,0,0,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0,2,4,0,1.0,0.0,1,0,1,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0,3,0,1,0.0,0.0,0,1,3,2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0,4,0,3,0.0,1.0,0,2,2,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0,5,0,4,0.0,1.0,0,3,3,2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [6]:
ct_df = ct_df[['round', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted',
       'ct_cons_wins', 't_cons_wins', 'round_type', 'de_cache',
       'de_cbble', 'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke',
       'de_overpass', 'de_train', 'nxt_rnd_type', 'nxt_ct_winner']]
t_df = t_df[['round', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted',
       'ct_cons_wins', 't_cons_wins', 'round_type', 'de_cache',
       'de_cbble', 'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke',
       'de_overpass', 'de_train', 'nxt_rnd_type', 'nxt_ct_winner']]

#### We are going to predict <code>nxt_ct_winner</code> whose values are 1 if CT is the winner team or 0 if T is the winner team

## Define features and target

In [7]:
t_df.columns

Index(['round', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted',
       'ct_cons_wins', 't_cons_wins', 'round_type', 'de_cache', 'de_cbble',
       'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke', 'de_overpass',
       'de_train', 'nxt_rnd_type', 'nxt_ct_winner'],
      dtype='object')

In [8]:
CT_FEATS = ['round', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted',
       'ct_cons_wins', 't_cons_wins', 'round_type', 'de_cache',
       'de_cbble', 'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke',
       'de_overpass', 'de_train', 'nxt_rnd_type']
T_FEATS = ['round', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted',
       'ct_cons_wins', 't_cons_wins', 'round_type', 'de_cache',
       'de_cbble', 'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke',
       'de_overpass', 'de_train', 'nxt_rnd_type']
TARGET = 'nxt_ct_winner'

## Split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(ct_df[CT_FEATS], ct_df[TARGET]) # CT Team

# X_train, X_test, y_train, y_test = train_test_split(t_df[T_FEATS], t_df[TARGET]) # T Team

## MODEL

In [10]:
#Preprocessor

numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

preprocessor_ct = ColumnTransformer(transformers=[('num', numeric_transformer, CT_FEATS)])
preprocessor_t = ColumnTransformer(transformers=[('num', numeric_transformer, T_FEATS)])

### CT Regressor

In [11]:
# regressor = LGBMClassifier(num_leaves=50,
#                           n_estimators=350,
#                           min_data_in_leaf=15,
#                           max_depth=7,
#                           learning_rate=0.055,
#                           feature_fraction=0.8,
#                           bagging_frequency=0.75,
#                           bagging_fraction=0.75)

### T Regressor

In [12]:
# regressor = LGBMClassifier(num_leaves=50,
#                           n_estimators=350,
#                           min_data_in_leaf=15,
#                           max_depth=7,
#                           learning_rate=0.055,
#                           feature_fraction=0.8,
#                           bagging_frequency=0.75,
#                           bagging_fraction=0.75)

### Models

In [13]:
# ct_model = Pipeline(steps=[('ct_preprocessor', preprocessor_ct),
#                            ('regressor', regressor)])

# t_model = Pipeline(steps=[('t_preprocessor', preprocessor_t),
#                            ('regressor', regressor)])

In [14]:
# ct_model.fit(X_train, y_train);

# t_model.fit(X_train, y_train);

### **CHECK PERFORMANCE**

In [15]:
# ct_model.score(X_test, y_test)

# t_model.score(X_test, y_test)

In [16]:
# sk_report = classification_report(
#     digits=6,
#     y_true=y_test, 
#     y_pred=ct_model.predict(X_test))
# print(sk_report)


# sk_report = classification_report(
#     digits=6,
#     y_true=y_test, 
#     y_pred=t_model.predict(X_test))
# print(sk_report)

### **TRAIN MODEL WITH FULL DATASET**

#### **CT Model. Train & Save**

In [17]:
regressor = LGBMClassifier(num_leaves=50,
                          n_estimators=350,
                          min_data_in_leaf=15,
                          max_depth=7,
                          learning_rate=0.055,
                          feature_fraction=0.8,
                          bagging_frequency=0.75,
                          bagging_fraction=0.75)

ct_model = Pipeline(steps=[('ct_preprocessor', preprocessor_ct),
                           ('regressor', regressor)])

ct_model.fit(ct_df[CT_FEATS], ct_df[TARGET]);

ct_df_pred_nxt_rnd = ct_model.predict(ct_df[CT_FEATS])

In [18]:
dump(ct_model, '../models/db_ct_winner_team.joblib') # Save model

['../models/db_ct_winner_team.joblib']

#### **T Model. Train & Save**

In [18]:
regressor = LGBMClassifier(num_leaves=50,
                          n_estimators=350,
                          min_data_in_leaf=15,
                          max_depth=7,
                          learning_rate=0.055,
                          feature_fraction=0.8,
                          bagging_frequency=0.75,
                          bagging_fraction=0.75)

t_model = Pipeline(steps=[('t_preprocessor', preprocessor_t),
                           ('regressor', regressor)])

t_model.fit(t_df[T_FEATS], t_df[TARGET]);

t_df_pred_nxt_rnd = t_model.predict(t_df[T_FEATS])

In [19]:
dump(t_model, '../models/db_t_winner_team.joblib') # Save model

['../models/db_t_winner_team.joblib']