## **Two ways to estimate the team value**
### **1st Direct estimation from records**
### **2nd Supervised ML regression algorithm**

In [1]:
import pandas as pd

In [2]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

### Weapon & Grenade Prices

In [3]:
wp_values = {'USP': 200, 'P2000': 200, 'Glock': 200, 'P250': 300, 'DualBarettas': 500, 'Tec9': 500, 'FiveSeven': 500, 'CZ': 500, 'Deagle': 700, 
             'Nova': 1200, 'Swag7': 1800, 'SawedOff': 1200, 'XM1014': 2000, 'M249': 5200, 'Negev': 4000,
             'Mac10': 1050, 'MP9': 1250, 'MP7': 1700, 'UMP': 1200, 'Bizon': 1400, 'P90': 2350, 
             'Gallil': 2000, 'Famas': 2250, 'Scout': 1700, 'AK47': 2700, 'M4A4': 3100, 'M4A1': 3100, 'SG556': 3000, 'AUG': 3300, 'G3SG1': 5000, 'Scar20': 5000, 'AWP': 4750, 
             'Decoy': 50, 'Flash': 200, 'Smoke': 300, 'HE': 300, 'Incendiary': 600, 'Molotov': 400, 
             'Knife': 0, 'Zeus': 200}

### Data

In [4]:
dmg_demos = pd.read_csv('../data/csgo/esea_master_dmg_clean_demos.csv')
nade_demos = pd.read_csv('../data/csgo/esea_master_grenades_clean_demos.csv')
meta_demos = pd.read_csv('../data/csgo/esea_meta_demos.csv')
kills_demos = pd.read_csv('../data/csgo/esea_master_kills_demos.csv')

## **2nd Supervised ML regression algorithm**
## **Preparing Data**

### **If time: try SPARK**

We will need 2 DataFrames: one for CT values and other for T values

Required DataFrame fields to reach a good prediction:
- file
- round
- weapons (value from records and fill the missing data with the mean
- grenades (value from records)
- players alive previous round
- winner team
- bomb planted previous round
- bomb defused *(NON IMPLEMENTED)*
- consecutive wins
- consecutive defeats *(NON IMPLEMENTED)*
- real team value from esea_meta_demos.csv
- round type from esea_meta_demos.csv

To be able to make the prediction we need to know that all the information is complete. Let's take complete games from esea_meta_demos.csv, the one with some missing rounds.

In [5]:
files_complete = []

for file in meta_demos['file'].unique():
    test = meta_demos[meta_demos['file'] == file]['round'].max() == len(meta_demos[meta_demos['file'] == file])
    if test == True:
        files_complete.append(file)

In [7]:
# LISTS

lst_file = []
lst_round = []

lst_wp_ct_val = []
lst_wp_t_val = []
lst_nade_ct_val = []
lst_nade_t_val = []
lst_ct_alive = []
lst_t_alive = []
lst_ct_winner = []
lst_bomb_planted = []
lst_ct_cons_wins = []
lst_t_cons_wins = []

lst_ct_val_real = []
lst_t_val_real = []
lst_round_type = []



# Create the loop around files and rounds
log = 0 # Log to see the progress

# Make some previous filtering to optimize the loop

flt_nade = dmg_demos['wp_type'] != 'Grenade'
flt_unknown = dmg_demos['wp_type'] != 'Unkown' # not well written in original data
flt_bomb = dmg_demos['wp'] != 'Bomb'

dmg_demos_flt = dmg_demos[flt_nade & flt_unknown & flt_bomb]

for file in files_complete:
    
    log += 1
    if log % 500 == 0: print(f'{log} / {len(files_complete)} ............')
    

    df_file_dmg = dmg_demos_flt[dmg_demos['file'] == file]
    df_file_nade = nade_demos[nade_demos['file'] == file]
    df_file_meta = meta_demos[meta_demos['file'] == file]
    df_file_kills = kills_demos[kills_demos['file'] == file]

    rounds = df_file_dmg['round'].unique()

    for rnd in rounds:

        # 1st WEAPONS
        df_round_ct_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'CounterTerrorist')]
        df_round_t_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'Terrorist')]

        wp_ct = df_round_ct_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
        wp_t = df_round_t_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
        # Drop_duplicates with subset to take the first record of each player. This way we reduce the error of taking a dropped gun

        wp_ct_val_records = sum([wp_values[weapon] for weapon in wp_ct['wp']])
        n_wp = len(wp_ct['wp'])
        if n_wp > 0:
            wp_ct_val = wp_ct_val_records + ((wp_ct_val_records/n_wp) * (5 - n_wp))
        elif n_wp == 0:
            wp_ct_val = wp_ct_val_records

        wp_t_val_records = sum([wp_values[weapon] for weapon in wp_t['wp']])
        n_wp = len(wp_t['wp'])
        if n_wp > 0:
            wp_t_val = wp_t_val_records + ((wp_t_val_records/n_wp) * (5 - n_wp))    
        elif n_wp == 0:
            wp_t_val = wp_t_val_records
        # Value of the guns the team is carrying (CT & T)

        # 2nd GRENADES
        df_round_ct_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'CounterTerrorist')]
        df_round_t_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'Terrorist')]

        nade_ct = df_round_ct_nade[['att_id', 'nade']].drop_duplicates()
        nade_t = df_round_t_nade[['att_id', 'nade']].drop_duplicates()
        # Drop_duplicates to take all the nades of each player (we asume the error of taking dropped nades)

        nade_ct_val = sum([wp_values[nade] for nade in nade_ct['nade']])
        nade_t_val = sum([wp_values[nade] for nade in nade_t['nade']])
        # Value of the grenades the team is carrying (CT & T)

        # 3rd PLAYER ALIVE previous round (CT & T)
        if rnd > 1:
            ct_t_alive = df_file_kills[df_file_kills['round'] == rnd-1][['ct_alive', 't_alive']].min().to_list()
            ct_alive = ct_t_alive[0]
            t_alive = ct_t_alive[1]
        else:
            ct_alive = 5
            t_alive = 5

        # 4th WINNER TEAM previous round (one column, ct_winner filled with [1, 0])
        if rnd > 1:
            if df_file_meta.reset_index(drop=True).loc[rnd-2, 'winner_side'] == 'CounterTerrorist':
                ct_winner = 1
            else:
                ct_winner = 0
        else:
            ct_winner = 0.5

        # 5th BOMB PLANTED previous round
        if rnd > 1:
            if True in df_file_dmg[df_file_dmg['round'] == rnd-1]['is_bomb_planted'].to_list():
                bomb_planted = 1
            else:
                bomb_planted = 0
        else:
            bomb_planted = 0.5

        # 6th CONSECUTIVE WINS (CT & T)
        df_consecutive = df_file_meta.reset_index(drop=True)
        wins_list = df_consecutive.loc[:rnd-2, 'winner_side'].to_list()[::-1]
        ct_cons_wins = 0
        t_cons_wins = 0

        for winner in wins_list: #One loop per team (CT)
            if winner == 'CounterTerrorist':
                ct_cons_wins += 1
            elif winner != 'CounterTerrorist':
                break

        for winner in wins_list: #One loop per team (T)
            if winner == 'Terrorist':
                t_cons_wins += 1
            elif winner != 'Terrorist':
                break

        # Extracting real CT & T team value and round_type from meta_demos
        ct_val_real = df_file_meta[df_file_meta['round'] == rnd]['ct_eq_val'].values[0]
        t_val_real = df_file_meta[df_file_meta['round'] == rnd]['t_eq_val'].values[0]
        round_type = df_file_meta[df_file_meta['round'] == rnd]['round_type'].values[0]
        # .values to get only the column value, not also the index
        # [0] because .values return a 1 dimension array


        # Append the values to the previosly defined lists 
        lst_file.append(file)
        lst_round.append(rnd)

        lst_wp_ct_val.append(wp_ct_val)
        lst_wp_t_val.append(wp_t_val)
        lst_nade_ct_val.append(nade_ct_val)
        lst_nade_t_val.append(nade_t_val)
        lst_ct_alive.append(ct_alive)
        lst_t_alive.append(t_alive)
        lst_ct_winner.append(ct_winner)
        lst_bomb_planted.append(bomb_planted)
        lst_ct_cons_wins.append(ct_cons_wins)
        lst_t_cons_wins.append(t_cons_wins)

        lst_ct_val_real.append(ct_val_real)
        lst_t_val_real.append(t_val_real)
        lst_round_type.append(round_type)



500 / 12185 ............
1000 / 12185 ............
1500 / 12185 ............
2000 / 12185 ............
2500 / 12185 ............
3000 / 12185 ............
3500 / 12185 ............
4000 / 12185 ............
4500 / 12185 ............
5000 / 12185 ............
5500 / 12185 ............
6000 / 12185 ............
6500 / 12185 ............
7000 / 12185 ............
7500 / 12185 ............
8000 / 12185 ............
8500 / 12185 ............
9000 / 12185 ............
9500 / 12185 ............
10000 / 12185 ............
10500 / 12185 ............
11000 / 12185 ............
11500 / 12185 ............
12000 / 12185 ............


### Create and save the DataFrame

In [8]:
df = pd.DataFrame([lst_file, lst_round, lst_wp_ct_val, lst_wp_t_val, lst_nade_ct_val, 
                        lst_nade_t_val, lst_ct_alive, lst_t_alive, lst_ct_winner, lst_bomb_planted, 
                        lst_ct_cons_wins, lst_t_cons_wins, lst_ct_val_real, lst_t_val_real, lst_round_type]).T

df.columns = ['file', 'round', 'wp_ct_val', 'wp_t_val', 'nade_ct_val', 
                   'nade_t_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 
                   'ct_cons_wins', 't_cons_wins', 'ct_val_real', 't_val_real', 'round_type']

df.to_csv('../data/processed/base_to_ml_predicted_team_value.csv', index=False)

### Load the DataFrame

In [9]:
df = pd.read_csv('../data/processed/base_to_ml_predicted_team_value.csv')../data/processed/

In [10]:
df.head()

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,t_val_real,round_type
0,esea_match_13779704.dem,1,1000.0,1166.666667,550,1200,5,5,0.5,0.5,0,0,4550,3850,PISTOL_ROUND
1,esea_match_13779704.dem,2,10100.0,3687.5,1100,50,4,0,1.0,0.0,1,0,18450,5300,ECO
2,esea_match_13779704.dem,3,4125.0,11700.0,900,2450,0,1,0.0,0.0,0,1,9550,22900,SEMI_ECO
3,esea_match_13779704.dem,4,1000.0,11700.0,0,1600,0,3,0.0,1.0,0,2,1600,19650,ECO
4,esea_match_13779704.dem,5,15500.0,12750.0,1400,1700,0,4,0.0,1.0,0,3,23350,21750,NORMAL


In [11]:
len(df['file'].unique())

12185

### Procesos básicos, sin loops file y round

In [172]:
rnd = 7

# Esto habrá que tabularlo

# 1st WEAPONS
df_round_ct_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'CounterTerrorist')]
df_round_t_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'Terrorist')]

wp_ct = df_round_ct_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
wp_t = df_round_t_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
# Drop_duplicates with subset to take the first record of each player. This way we reduce the error of taking a dropped gun

wp_ct_val_records = sum([wp_values[weapon] for weapon in wp_ct['wp']])
n_wp = len(wp_ct['wp'])
if n_wp > 0:
    wp_ct_val = wp_ct_val_records + ((wp_ct_val_records/n_wp) * (5 - n_wp))
elif n_wp == 0:
    wp_ct_val = wp_ct_val_records

wp_t_val_records = sum([wp_values[weapon] for weapon in wp_t['wp']])
n_wp = len(wp_t['wp'])
if n_wp > 0:
    wp_t_val = wp_t_val_records + ((wp_t_val_records/n_wp) * (5 - n_wp))    
elif n_wp == 0:
    wp_t_val = wp_t_val_records
# Value of the guns the team is carrying (CT & T)

# 2nd GRENADES
df_round_ct_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'CounterTerrorist')]
df_round_t_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'Terrorist')]

nade_ct = df_round_ct_nade[['att_id', 'nade']].drop_duplicates()
nade_t = df_round_t_nade[['att_id', 'nade']].drop_duplicates()
# Drop_duplicates to take all the nades of each player (we asume the error of taking dropped nades)

nade_ct_val = sum([wp_values[nade] for nade in nade_ct['nade']])
nade_t_val = sum([wp_values[nade] for nade in nade_t['nade']])
# Value of the grenades the team is carrying (CT & T)

# 3rd PLAYER ALIVE previous round (CT & T)
if rnd > 1:
    ct_t_alive = df_file_kills[df_file_kills['round'] == rnd-1][['ct_alive', 't_alive']].min().to_list()
    ct_alive = ct_t_alive[0]
    t_alive = ct_t_alive[1]
else:
    ct_alive = 5
    t_alive = 5
    
# 4th WINNER TEAM previous round (one column, ct_winner filled with [1, 0])
if rnd > 1:
    if df_file_meta.reset_index(drop=True).loc[rnd-2, 'winner_side'] == 'CounterTerrorist':
        ct_winner = 1
    else:
        ct_winner = 0
else:
    ct_winner = 0.5

# 5th BOMB PLANTED previous round
if rnd > 1:
    if True in df_file_dmg[df_file_dmg['round'] == rnd-1]['is_bomb_planted'].to_list():
        bomb_planted = 1
    else:
        bomb_planted = 0
else:
    bomb_planted = 0.5
    
# 6th CONSECUTIVE WINS (CT & T)
df_consecutive = df_file_meta.reset_index(drop=True)
wins_list = df_consecutive.loc[:rnd-2, 'winner_side'].to_list()[::-1]
ct_cons_wins = 0
t_cons_wins = 0

for winner in wins_list: #One loop per team (CT)
    if winner == 'CounterTerrorist':
        ct_cons_wins += 1
    elif winner != 'CounterTerrorist':
        break
        
for winner in wins_list: #One loop per team (T)
    if winner == 'Terrorist':
        t_cons_wins += 1
    elif winner != 'Terrorist':
        break
