## **Supervised ML regression algorithm to predict next round team value (CT & T)**
## **Preprocessing**

Input data: Obtained DataFrames from the previous prediction:
- ct_predicted_value
- t_predicted_value

Preprocces the data to create a new traget column, next round team value (**nxt_rnd_ct_val** & **nxt_rnd_t_val**)

In [1]:
import pandas as pd

In [2]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

### Data

In [3]:
ct_df = pd.read_csv('../data/results/ct_predicted_value')
t_df = pd.read_csv('../data/results/t_predicted_value')

In [4]:
# df = pd.read_csv('../data/processed/base_to_ml_predicted_team_value.csv')

### Preprocessing

Erase column 'round_type' because we cannot know this value from the live records

## Preprocessing

To train the model we need to know the team value for the next round. 

We will train with the predicted value for the current round and real value for the next round

In [5]:
ct_df.drop(columns=['Unnamed: 0'], inplace=True)
t_df.drop(columns=['Unnamed: 0'], inplace=True)

#### Create the lists **nxt_rnd_ct_val** & **nxt_rnd_t_val** that will conform the future columns

In [6]:
nxt_rnd_ct_val = []
nxt_rnd_t_val = []

In [7]:
display(ct_df.head())
t_df.tail()

Unnamed: 0,file,round,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,ct_val_pred
0,0,1,1000.0,550,5,5,0.5,0.5,0,0,4550,4078.134589
1,0,2,10100.0,1100,4,0,1.0,0.0,1,0,18450,17819.702711
2,0,3,4125.0,900,0,1,0.0,0.0,0,1,9550,7038.468589
3,0,4,1000.0,0,0,3,0.0,1.0,0,2,1600,1452.468928
4,0,5,15500.0,1400,0,4,0.0,1.0,0,3,23350,22676.205763


Unnamed: 0,file,round,wp_t_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,t_val_real,t_val_pred
310808,12184,20,13500.0,1900,0,4,0.0,0.0,0,3,28150,25570.188294
310809,12184,21,16062.5,1400,5,0,1.0,0.0,1,0,25450,23175.06022
310810,12184,22,13500.0,1400,2,0,1.0,0.0,2,0,16600,19941.209858
310811,12184,23,13500.0,2600,2,4,0.0,0.0,0,1,24450,25325.232123
310812,12184,24,20333.333333,2700,0,2,0.0,1.0,0,2,27500,27964.669856


In [8]:
%%time

files = ct_df['file'].unique()

nxt_rnd_ct_val = []
nxt_rnd_t_val = []

log = 0

for file in files:

    ct_df_file = ct_df[ct_df['file'] == file]
    t_df_file = t_df[t_df['file'] == file]
    
    rounds = ct_df_file['round'].unique()
    
    log += 1
    if log%500 == 0:
        print(f'{log}/12185')
    
    for rnd in rounds:
        if rnd == rounds[-1]:
            nxt_rnd_ct_val.append(0)
            nxt_rnd_t_val.append(0)
        else:
            nxt_rnd_ct_val.append(ct_df_file[ct_df_file['round'] == rnd + 1]['ct_val_real'].values[0])
            nxt_rnd_t_val.append(t_df_file[t_df_file['round'] == rnd + 1]['t_val_real'].values[0])

500/12185
1000/12185
1500/12185
2000/12185
2500/12185
3000/12185
3500/12185
4000/12185
4500/12185
5000/12185
5500/12185
6000/12185
6500/12185
7000/12185
7500/12185
8000/12185
8500/12185
9000/12185
9500/12185
10000/12185
10500/12185
11000/12185
11500/12185
12000/12185
CPU times: user 4min 12s, sys: 408 ms, total: 4min 13s
Wall time: 4min 13s


In [9]:
ct_df['nxt_rnd_ct_val'] = nxt_rnd_ct_val
t_df['nxt_rnd_t_val'] = nxt_rnd_t_val

In [10]:
ct_df.drop(columns=['ct_val_real'], inplace=True)
t_df.drop(columns=['t_val_real'], inplace=True)

In [11]:
ct_df.to_csv('../data/processed/3_TEST_base_predict_next_rnd_ct_val.csv', index=False)
t_df.to_csv('../data/processed/3_TEST_base_predict_next_rnd_t_val.csv', index=False)

In [13]:
ct_df.set_index(['file', 'round'], inplace=True)
t_df.set_index(['file', 'round'], inplace=True)

In [14]:
display(ct_df.head())
t_df.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,nxt_rnd_ct_val
file,round,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
esea_match_13779704.dem,1,1000.0,550,5,5,0.5,0.5,0,0,4550,0
esea_match_13779704.dem,2,10100.0,1100,4,0,1.0,0.0,1,0,18450,0
esea_match_13779704.dem,3,4125.0,900,0,1,0.0,0.0,0,1,9550,0
esea_match_13779704.dem,4,1000.0,0,0,3,0.0,1.0,0,2,1600,0
esea_match_13779704.dem,5,15500.0,1400,0,4,0.0,1.0,0,3,23350,0


Unnamed: 0_level_0,Unnamed: 1_level_0,wp_t_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,t_val_real,nxt_rnd_t_val
file,round,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
esea_match_13829171.dem,20,13500.0,1900,0,4,0.0,0.0,0,3,28150,0
esea_match_13829171.dem,21,16062.5,1400,5,0,1.0,0.0,1,0,25450,0
esea_match_13829171.dem,22,13500.0,1400,2,0,1.0,0.0,2,0,16600,0
esea_match_13829171.dem,23,13500.0,2600,2,4,0.0,0.0,0,1,24450,0
esea_match_13829171.dem,24,20333.333333,2700,0,2,0.0,1.0,0,2,27500,0


#### Filling the values with the real values of next rounds

In [15]:
# %%time

# a = 0

# for file in files:
#     a += 1
#     if a%500 == 0:
#         print(f'{a}/12185')
#     for rnd in ct_df.loc[file].index[:-1]:
#         ct_df.loc[(file, rnd), 'nxt_rnd_ct_val'] = ct_df.loc[(file, rnd+1), 'ct_val_pred']
#         t_df.loc[(file, rnd), 'nxt_rnd_t_val'] = t_df.loc[(file, rnd+1), 't_val_pred']

In [16]:
%%time

a = 0

for file in files:
    a += 1
    if a%500 == 0:
        print(f'{a}/12185')
    for rnd in ct_df.loc[file].index[:-1]:
        ct_df.loc[(file, rnd), 'nxt_rnd_ct_val'] = ct_df.loc[(file, rnd+1), 'ct_val_real']
#         t_df.loc[(file, rnd), 'nxt_rnd_t_val'] = t_df.loc[(file, rnd+1), 't_val_real']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


500/12185
1000/12185
1500/12185
2000/12185
2500/12185
3000/12185
3500/12185
4000/12185
4500/12185
5000/12185
5500/12185
6000/12185
6500/12185
7000/12185
7500/12185
8000/12185
8500/12185
9000/12185
9500/12185
10000/12185
10500/12185
11000/12185
11500/12185
12000/12185
CPU times: user 7min 56s, sys: 60.3 ms, total: 7min 56s
Wall time: 7min 56s


### Reset index and save the DataFrames 

In [17]:
ct_df.reset_index(inplace=True)
# t_df.reset_index(inplace=True)

In [23]:
display(ct_df.head())
# t_df.tail()

Unnamed: 0,file,round,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,nxt_rnd_ct_val
0,esea_match_13779704.dem,1,1000.0,550,5,5,0.5,0.5,0,0,4550,18450.0
1,esea_match_13779704.dem,2,10100.0,1100,4,0,1.0,0.0,1,0,18450,9550.0
2,esea_match_13779704.dem,3,4125.0,900,0,1,0.0,0.0,0,1,9550,1600.0
3,esea_match_13779704.dem,4,1000.0,0,0,3,0.0,1.0,0,2,1600,23350.0
4,esea_match_13779704.dem,5,15500.0,1400,0,4,0.0,1.0,0,3,23350,26400.0


In [24]:
ct_df.to_csv('../data/processed/3_TEST_base_predict_next_rnd_ct_val.csv', index=False)
# t_df.to_csv('../data/processed/3_TEST_base_predict_next_rnd_t_val.csv', index=True)

### Load Data

In [11]:
# ct_df = pd.read_csv('../data/processed/3_base_predict_next_rnd_ct_val.csv')
# t_df = pd.read_csv('../data/processed/3_base_predict_next_rnd_t_val.csv')

In [12]:
# display(ct_df.head())
# display(t_df.head())

Unnamed: 0.1,Unnamed: 0,file,round,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,ct_val_pred,nxt_rnd_ct_val
0,0,0,1,1000.0,550,5,5,0.5,0.5,0,0,4550,4078.134589,17819.702711
1,1,0,2,10100.0,1100,4,0,1.0,0.0,1,0,18450,17819.702711,7038.468589
2,2,0,3,4125.0,900,0,1,0.0,0.0,0,1,9550,7038.468589,1452.468928
3,3,0,4,1000.0,0,0,3,0.0,1.0,0,2,1600,1452.468928,22676.205763
4,4,0,5,15500.0,1400,0,4,0.0,1.0,0,3,23350,22676.205763,26585.694068


Unnamed: 0.1,Unnamed: 0,file,round,wp_t_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,t_val_real,t_val_pred,nxt_rnd_t_val
0,0,0,1,1166.666667,1200,5,5,0.5,0.5,0,0,3850,3943.272665,6290.616771
1,1,0,2,3687.5,50,4,0,1.0,0.0,1,0,5300,6290.616771,19600.790638
2,2,0,3,11700.0,2450,0,1,0.0,0.0,0,1,22900,19600.790638,22568.098741
3,3,0,4,11700.0,1600,0,3,0.0,1.0,0,2,19650,22568.098741,24459.855175
4,4,0,5,12750.0,1700,0,4,0.0,1.0,0,3,21750,24459.855175,11755.134124
