## **Supervised ML classification algorithm to predict the winner team (CT or T)**
## **Encoding**

In [8]:
import pandas as pd

pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

In [9]:
df = pd.read_csv('../data/processed/5_base_predict_winner.csv')

In [10]:
df.head()

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,prev_ct_winner,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_pred,t_val_pred,ct_round_type,t_round_type,ct_nxt_rnd_type,t_nxt_rnd_type,ct_nxt_rnd_type_pred,t_nxt_rnd_type_pred
0,0,1,1000.0,1166.666667,550,1200,5,5,0.5,1,0.5,0,0,4078.134589,3943.272665,PISTOL_ROUND,PISTOL_ROUND,MEDIUM,ECO,MEDIUM,ECO
1,0,2,10100.0,3687.5,1100,50,4,0,1.0,0,0.0,1,0,17819.702711,6290.616771,MEDIUM,ECO,MEDIUM,FULL,FULL,ECO
2,0,3,4125.0,11700.0,900,2450,0,1,0.0,0,0.0,0,1,7038.468589,19600.790638,MEDIUM,FULL,ECO,MEDIUM,ECO,FULL
3,0,4,1000.0,11700.0,0,1600,0,3,0.0,0,1.0,0,2,1452.468928,22568.098741,ECO,MEDIUM,FULL,FULL,FULL,FULL
4,0,5,15500.0,12750.0,1400,1700,0,4,0.0,1,1.0,0,3,22676.205763,24459.855175,FULL,FULL,FULL,MEDIUM,ECO,FULL


In [11]:
round_type_dic = {'PISTOL_ROUND':0, 'ECO':1, 'MEDIUM':2, 'FULL':3, 'LAST':4}

In [12]:
columns_encoding = ['ct_round_type', 'ct_nxt_rnd_type', 'ct_nxt_rnd_type_pred',
                    't_round_type', 't_nxt_rnd_type', 't_nxt_rnd_type_pred']

for col in columns_encoding:
    df[col] = df[col].apply(lambda x: round_type_dic[x])

In [13]:
df.head()

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,prev_ct_winner,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_pred,t_val_pred,ct_round_type,t_round_type,ct_nxt_rnd_type,t_nxt_rnd_type,ct_nxt_rnd_type_pred,t_nxt_rnd_type_pred
0,0,1,1000.0,1166.666667,550,1200,5,5,0.5,1,0.5,0,0,4078.134589,3943.272665,0,0,2,1,2,1
1,0,2,10100.0,3687.5,1100,50,4,0,1.0,0,0.0,1,0,17819.702711,6290.616771,2,1,2,3,3,1
2,0,3,4125.0,11700.0,900,2450,0,1,0.0,0,0.0,0,1,7038.468589,19600.790638,2,3,1,2,1,3
3,0,4,1000.0,11700.0,0,1600,0,3,0.0,0,1.0,0,2,1452.468928,22568.098741,1,2,3,3,3,3
4,0,5,15500.0,12750.0,1400,1700,0,4,0.0,1,1.0,0,3,22676.205763,24459.855175,3,3,3,2,1,3


## **Preprocessing**

The <code>bomb_planted</code> column refers to the previous round. Let's specify this by changing the name to <code>prev_bomb_planted</code> and let's add a new column <code>bomb_planted</code> with **1 if the bomb is planted in the current round and 0 if not**.

We will take this column from the original dataset <code>esea_master_kills_demos.csv</code>

In [14]:
df.rename(columns={'bomb_planted':'prev_bomb_planted'}, inplace=True)

In [17]:
meta_demos = pd.read_csv('../data/csgo/esea_meta_demos.csv')
kills_demos = pd.read_csv('../data/csgo/esea_master_kills_demos.csv') 

We load the dataset <code>esea_meta_demos.csv</code> to take the list of files we are woring with

In [19]:
files_complete = [file for file in meta_demos['file'].unique() if\
                     meta_demos[meta_demos['file'] == file]['round'].max() == \
                     len(meta_demos[meta_demos['file'] == file])]

In [39]:
bomb_planted = []

for file in files_complete:
    kills_file = kills_demos[kills_demos['file'] == file]
    rounds = kills_file['round'].unique()
    for rnd in rounds:
        kills_rnd = kills_file[kills_file['round'] == rnd]
        if True in kills_rnd['is_bomb_planted'].to_list():
            bomb_planted.append(1)
        else:
            bomb_planted.append(0)

In [40]:
print(len(bomb_planted))
print(len(df))

310813
310813


In [41]:
df['bomb_planted'] = bomb_planted

In [43]:
df.columns

Index(['file', 'round', 'wp_ct_val', 'wp_t_val', 'nade_ct_val', 'nade_t_val',
       'ct_alive', 't_alive', 'prev_ct_winner', 'ct_winner',
       'prev_bomb_planted', 'ct_cons_wins', 't_cons_wins', 'ct_val_pred',
       't_val_pred', 'ct_round_type', 't_round_type', 'ct_nxt_rnd_type',
       't_nxt_rnd_type', 'ct_nxt_rnd_type_pred', 't_nxt_rnd_type_pred',
       'bomb_planted'],
      dtype='object')

In [44]:
columns = ['file', 'round', 'wp_ct_val', 'wp_t_val', 'nade_ct_val', 'nade_t_val',
       'ct_alive', 't_alive', 'prev_ct_winner', 'ct_winner',
       'prev_bomb_planted', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 'ct_val_pred',
       't_val_pred', 'ct_round_type', 't_round_type', 'ct_nxt_rnd_type',
       't_nxt_rnd_type', 'ct_nxt_rnd_type_pred', 't_nxt_rnd_type_pred']

In [45]:
df = df[columns]
df.head()

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,prev_ct_winner,ct_winner,prev_bomb_planted,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_pred,t_val_pred,ct_round_type,t_round_type,ct_nxt_rnd_type,t_nxt_rnd_type,ct_nxt_rnd_type_pred,t_nxt_rnd_type_pred
0,0,1,1000.0,1166.666667,550,1200,5,5,0.5,1,0.5,0,0,0,4078.134589,3943.272665,0,0,2,1,2,1
1,0,2,10100.0,3687.5,1100,50,4,0,1.0,0,0.0,0,1,0,17819.702711,6290.616771,2,1,2,3,3,1
2,0,3,4125.0,11700.0,900,2450,0,1,0.0,0,0.0,1,0,1,7038.468589,19600.790638,2,3,1,2,1,3
3,0,4,1000.0,11700.0,0,1600,0,3,0.0,0,1.0,1,0,2,1452.468928,22568.098741,1,2,3,3,3,3
4,0,5,15500.0,12750.0,1400,1700,0,4,0.0,1,1.0,0,0,3,22676.205763,24459.855175,3,3,3,2,1,3


### **Create the TARGET** (column to predict)
#### <code>nxt_ct_winner</code>

In [53]:
%%time

files = df['file'].unique()

nxt_ct_winner = []

log = 0

for file in files:

    df_file = df[df['file'] == file] 
    rounds = df_file['round'].unique()
    
    if log%1000 == 0:
        print(f'{log}/12185')    
    log += 1

    for rnd in rounds:
        if rnd == rounds[-1]:
            nxt_ct_winner.append(0)
        else:
            nxt_ct_winner.append(df_file[df_file['round'] == rnd + 1]['bomb_planted'].values[0])
            
df['nxt_ct_winner'] = nxt_ct_winner

0/12185
1000/12185
2000/12185
3000/12185
4000/12185
5000/12185
6000/12185
7000/12185
8000/12185
9000/12185
10000/12185
11000/12185
12000/12185
CPU times: user 2min 16s, sys: 60.1 ms, total: 2min 16s
Wall time: 2min 16s


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [54]:
df.head()

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,prev_ct_winner,ct_winner,prev_bomb_planted,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_pred,t_val_pred,ct_round_type,t_round_type,ct_nxt_rnd_type,t_nxt_rnd_type,ct_nxt_rnd_type_pred,t_nxt_rnd_type_pred,nxt_ct_winner
0,0,1,1000.0,1166.666667,550,1200,5,5,0.5,1,0.5,0,0,0,4078.134589,3943.272665,0,0,2,1,2,1,0
1,0,2,10100.0,3687.5,1100,50,4,0,1.0,0,0.0,0,1,0,17819.702711,6290.616771,2,1,2,3,3,1,1
2,0,3,4125.0,11700.0,900,2450,0,1,0.0,0,0.0,1,0,1,7038.468589,19600.790638,2,3,1,2,1,3,1
3,0,4,1000.0,11700.0,0,1600,0,3,0.0,0,1.0,1,0,2,1452.468928,22568.098741,1,2,3,3,3,3,0
4,0,5,15500.0,12750.0,1400,1700,0,4,0.0,1,1.0,0,0,3,22676.205763,24459.855175,3,3,3,2,1,3,0


#### Last step before going into the ml classification algorithm, let's remove the following columns:
- <code>ct_nxt_rnd_type</code>
- <code>t_nxt_rnd_type</code> 	


In [56]:
df.drop(columns=['ct_nxt_rnd_type', 't_nxt_rnd_type'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [57]:
df.head()

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,prev_ct_winner,ct_winner,prev_bomb_planted,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_pred,t_val_pred,ct_round_type,t_round_type,ct_nxt_rnd_type_pred,t_nxt_rnd_type_pred,nxt_ct_winner
0,0,1,1000.0,1166.666667,550,1200,5,5,0.5,1,0.5,0,0,0,4078.134589,3943.272665,0,0,2,1,0
1,0,2,10100.0,3687.5,1100,50,4,0,1.0,0,0.0,0,1,0,17819.702711,6290.616771,2,1,3,1,1
2,0,3,4125.0,11700.0,900,2450,0,1,0.0,0,0.0,1,0,1,7038.468589,19600.790638,2,3,1,3,1
3,0,4,1000.0,11700.0,0,1600,0,3,0.0,0,1.0,1,0,2,1452.468928,22568.098741,1,2,3,3,0
4,0,5,15500.0,12750.0,1400,1700,0,4,0.0,1,1.0,0,0,3,22676.205763,24459.855175,3,3,1,3,0


In [59]:
df.to_csv('../data/processed/5_1_base_predict_winner.csv', index=False)