In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from lightgbm import LGBMClassifier
from joblib import dump

In [2]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

### Data

In [3]:
ct_df = pd.read_csv('../data/processed/4_base_predict_next_rnd_ct_type.csv')
t_df = pd.read_csv('../data/processed/4_base_predict_next_rnd_t_type.csv')

In [4]:
t_df.columns

Index(['file', 'round', 'wp_t_val', 'nade_t_val', 'ct_alive', 't_alive',
       'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins',
       't_val_pred', 'round_type', 'nxt_rnd_type'],
      dtype='object')

In [5]:
ct_df = ct_df[['file', 'round', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 'round_type', 'nxt_rnd_type']]
t_df = t_df[['file', 'round', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 'round_type', 'nxt_rnd_type']]

In [6]:
display(ct_df.head())
display(t_df.head())

Unnamed: 0,file,round,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,round_type,nxt_rnd_type
0,0,1,5,5,0.5,0.5,0,0,PISTOL_ROUND,MEDIUM
1,0,2,4,0,1.0,0.0,1,0,MEDIUM,MEDIUM
2,0,3,0,1,0.0,0.0,0,1,MEDIUM,ECO
3,0,4,0,3,0.0,1.0,0,2,ECO,FULL
4,0,5,0,4,0.0,1.0,0,3,FULL,FULL


Unnamed: 0,file,round,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,round_type,nxt_rnd_type
0,0,1,5,5,0.5,0.5,0,0,PISTOL_ROUND,ECO
1,0,2,4,0,1.0,0.0,1,0,ECO,FULL
2,0,3,0,1,0.0,0.0,0,1,FULL,MEDIUM
3,0,4,0,3,0.0,1.0,0,2,MEDIUM,FULL
4,0,5,0,4,0.0,1.0,0,3,FULL,MEDIUM


In [7]:
df = pd.read_csv('../data/processed/base_to_ml_predicted_team_value.csv')

In [8]:
df.head()

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,t_val_real,round_type
0,esea_match_13779704.dem,1,1000.0,1166.666667,550,1200,5,5,0.5,0.5,0,0,4550,3850,PISTOL_ROUND
1,esea_match_13779704.dem,2,10100.0,3687.5,1100,50,4,0,1.0,0.0,1,0,18450,5300,ECO
2,esea_match_13779704.dem,3,4125.0,11700.0,900,2450,0,1,0.0,0.0,0,1,9550,22900,SEMI_ECO
3,esea_match_13779704.dem,4,1000.0,11700.0,0,1600,0,3,0.0,1.0,0,2,1600,19650,ECO
4,esea_match_13779704.dem,5,15500.0,12750.0,1400,1700,0,4,0.0,1.0,0,3,23350,21750,NORMAL


In [9]:
ct_df['file_name'] = df['file']
t_df['file_name'] = df['file']

In [10]:
meta_demos = pd.read_csv('../data/csgo/esea_meta_demos.csv')

In [11]:
meta_demos.head()

Unnamed: 0.1,Unnamed: 0,file,map,round,start_seconds,end_seconds,winner_team,winner_side,round_type,ct_eq_val,t_eq_val
0,0,esea_match_13770997.dem,de_overpass,1,94.30782,160.9591,Hentai Hooligans,Terrorist,PISTOL_ROUND,4300,4250
1,1,esea_match_13770997.dem,de_overpass,2,160.9591,279.3998,Hentai Hooligans,Terrorist,ECO,6300,19400
2,2,esea_match_13770997.dem,de_overpass,3,279.3998,341.0084,Hentai Hooligans,Terrorist,SEMI_ECO,7650,19250
3,3,esea_match_13770997.dem,de_overpass,4,341.0084,435.4259,Hentai Hooligans,Terrorist,NORMAL,24900,23400
4,4,esea_match_13770997.dem,de_overpass,5,435.4259,484.2398,Animal Style,CounterTerrorist,ECO,5400,20550


In [12]:
meta_demos_test = meta_demos[['file', 'map']]
meta_demos_test.head()

Unnamed: 0,file,map
0,esea_match_13770997.dem,de_overpass
1,esea_match_13770997.dem,de_overpass
2,esea_match_13770997.dem,de_overpass
3,esea_match_13770997.dem,de_overpass
4,esea_match_13770997.dem,de_overpass


In [13]:
meta_demos_test = meta_demos_test.drop_duplicates()

In [14]:
file_map_dic = pd.Series(meta_demos_test.map.values, index=meta_demos_test.file).to_dict()

In [15]:
ct_df['map'] = ct_df['file_name'].apply(lambda x: file_map_dic[x])
t_df['map'] = t_df['file_name'].apply(lambda x: file_map_dic[x])

In [16]:
ct_df.drop(columns=('file_name'), inplace=True)
t_df.drop(columns=('file_name'), inplace=True)

## Encoding

In [17]:
round_type_dic = {'PISTOL_ROUND':0, 'ECO':1, 'MEDIUM':2, 'FULL':3, 'LAST':4}

In [18]:
ct_df['round_type'] = ct_df['round_type'].apply(lambda x: round_type_dic[x])
ct_df['nxt_rnd_type'] = ct_df['nxt_rnd_type'].apply(lambda x: round_type_dic[x])

t_df['round_type'] = t_df['round_type'].apply(lambda x: round_type_dic[x])
t_df['nxt_rnd_type'] = t_df['nxt_rnd_type'].apply(lambda x: round_type_dic[x])

In [19]:
encoder = OneHotEncoder(sparse=False)

ct_df_enc = pd.DataFrame(encoder.fit_transform(ct_df[['map']]))
t_df_enc = pd.DataFrame(encoder.fit_transform(t_df[['map']]))

ct_df_enc.columns = encoder.get_feature_names(['map'])
t_df_enc.columns = encoder.get_feature_names(['map'])

ct_df.drop(columns=('map'), inplace=True)
t_df.drop(columns=('map'), inplace=True)

ct_df = pd.concat([ct_df, ct_df_enc], axis=1)
t_df = pd.concat([t_df, t_df_enc], axis=1)

In [20]:
ct_df.rename(columns={'map_de_cache':'de_cache', 'map_de_cbble':'de_cbble', 'map_de_dust2':'de_dust2', 'map_de_inferno':'de_inferno',
                     'map_de_mirage':'de_mirage', 'map_de_nuke':'de_nuke', 'map_de_overpass':'de_overpass', 'map_de_train':'de_train'}, 
             inplace=True)

t_df.rename(columns={'map_de_cache':'de_cache', 'map_de_cbble':'de_cbble', 'map_de_dust2':'de_dust2', 'map_de_inferno':'de_inferno',
                     'map_de_mirage':'de_mirage', 'map_de_nuke':'de_nuke', 'map_de_overpass':'de_overpass', 'map_de_train':'de_train'}, 
             inplace=True)

In [22]:
ct_df.to_csv('../data/processed/for_db_2_ct.csv', index=False)
t_df.to_csv('../data/processed/for_db_2_t.csv', index=False)

In [21]:
display(ct_df.head())
display(t_df.head())

Unnamed: 0,file,round,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,round_type,nxt_rnd_type,de_cache,de_cbble,de_dust2,de_inferno,de_mirage,de_nuke,de_overpass,de_train
0,0,1,5,5,0.5,0.5,0,0,0,2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,2,4,0,1.0,0.0,1,0,2,2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,3,0,1,0.0,0.0,0,1,2,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,4,0,3,0.0,1.0,0,2,1,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,5,0,4,0.0,1.0,0,3,3,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,file,round,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,round_type,nxt_rnd_type,de_cache,de_cbble,de_dust2,de_inferno,de_mirage,de_nuke,de_overpass,de_train
0,0,1,5,5,0.5,0.5,0,0,0,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,2,4,0,1.0,0.0,1,0,1,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,3,0,1,0.0,0.0,0,1,3,2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,4,0,3,0.0,1.0,0,2,2,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,5,0,4,0.0,1.0,0,3,3,2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Define features and target

In [22]:
CT_FEATS = ['file', 'round', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 'round_type', 'de_cache', 
            'de_cbble', 'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke', 'de_overpass', 'de_train']
T_FEATS = ['file', 'round', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 'round_type', 'de_cache', 
           'de_cbble', 'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke', 'de_overpass', 'de_train']
TARGET = 'nxt_rnd_type'

In [23]:
#Preprocessor

numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

preprocessor_ct = ColumnTransformer(transformers=[('num', numeric_transformer, CT_FEATS)])
preprocessor_t = ColumnTransformer(transformers=[('num', numeric_transformer, T_FEATS)])

### **SPLITS**

In [24]:
X_train, X_test, y_train, y_test = train_test_split(ct_df[CT_FEATS], ct_df[TARGET]) # CT Team

# X_train, X_test, y_train, y_test = train_test_split(t_df[T_FEATS], t_df[TARGET]) # T Team

### **MODEL**

### CT Regressor

In [25]:
# regressor = LGBMClassifier(num_leaves=50,
#                           n_estimators=350,
#                           min_data_in_leaf=15,
#                           max_depth=7,
#                           learning_rate=0.055,
#                           feature_fraction=0.8,
#                           bagging_frequency=0.75,
#                           bagging_fraction=0.75)

### T Regressor

In [26]:
# regressor = LGBMClassifier(num_leaves=50,
#                           n_estimators=350,
#                           min_data_in_leaf=15,
#                           max_depth=7,
#                           learning_rate=0.055,
#                           feature_fraction=0.8,
#                           bagging_frequency=0.75,
#                           bagging_fraction=0.75)

### Models

In [27]:
# ct_model = Pipeline(steps=[('ct_preprocessor', preprocessor_ct),
#                            ('regressor', regressor)])

# t_model = Pipeline(steps=[('t_preprocessor', preprocessor_t),
#                            ('regressor', regressor)])

In [28]:
# ct_model.fit(X_train, y_train);

# t_model.fit(X_train, y_train);

### **CHECK PERFORMANCE**

In [29]:
# ct_model.score(X_test, y_test)

# t_model.score(X_test, y_test)

In [30]:
# sk_report = classification_report(
#     digits=6,
#     y_true=y_test, 
#     y_pred=ct_model.predict(X_test))
# print(sk_report)


# sk_report = classification_report(
#     digits=6,
#     y_true=y_test, 
#     y_pred=t_model.predict(X_test))
# print(sk_report)

### **TRAIN MODEL WITH FULL DATASET**

#### **CT Model. Train & Save**

In [31]:
regressor = LGBMClassifier(num_leaves=50,
                          n_estimators=350,
                          min_data_in_leaf=15,
                          max_depth=7,
                          learning_rate=0.055,
                          feature_fraction=0.8,
                          bagging_frequency=0.75,
                          bagging_fraction=0.75)

ct_model = Pipeline(steps=[('ct_preprocessor', preprocessor_ct),
                           ('regressor', regressor)])

ct_model.fit(ct_df[CT_FEATS], ct_df[TARGET]);

ct_df_pred_nxt_rnd = ct_model.predict(ct_df[CT_FEATS])

In [32]:
dump(ct_model, '../models/db_ct_nxt_rnd_type.joblib') # Save model

['../models/db_ct_nxt_rnd_type.joblib']

In [33]:
ct_df['ct_nxt_rnd_type_pred'] = ct_df_pred_nxt_rnd

#### **T Model. Train & Save**

In [31]:
regressor = LGBMClassifier(num_leaves=50,
                          n_estimators=350,
                          min_data_in_leaf=15,
                          max_depth=7,
                          learning_rate=0.055,
                          feature_fraction=0.8,
                          bagging_frequency=0.75,
                          bagging_fraction=0.75)

t_model = Pipeline(steps=[('t_preprocessor', preprocessor_t),
                           ('regressor', regressor)])

t_model.fit(t_df[T_FEATS], t_df[TARGET]);

t_df_pred_nxt_rnd = t_model.predict(t_df[T_FEATS])

In [32]:
dump(t_model, '../models/db_t_nxt_rnd_type.joblib') # Save model

['../models/db_t_nxt_rnd_type.joblib']

In [33]:
t_df['t_nxt_rnd_type_pred'] = t_df_pred_nxt_rnd

In [35]:
t_df.columns.to_list()

['file',
 'round',
 'ct_alive',
 't_alive',
 'ct_winner',
 'bomb_planted',
 'ct_cons_wins',
 't_cons_wins',
 'round_type',
 'nxt_rnd_type',
 'de_cache',
 'de_cbble',
 'de_dust2',
 'de_inferno',
 'de_mirage',
 'de_nuke',
 'de_overpass',
 'de_train',
 't_nxt_rnd_type_pred']