## **Two ways to estimate the team value**
### **1st Direct estimation from records**
### **2nd Supervised ML regression algorithm**

In [17]:
import pandas as pd
from sklearn.metrics import mean_squared_error

In [18]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

### Weapon & Grenade Prices

In [19]:
wp_values = {'USP': 200, 'P2000': 200, 'Glock': 200, 'P250': 300, 'DualBarettas': 500, 'Tec9': 500, 'FiveSeven': 500, 'CZ': 500, 'Deagle': 700, 
             'Nova': 1200, 'Swag7': 1800, 'SawedOff': 1200, 'XM1014': 2000, 'M249': 5200, 'Negev': 4000,
             'Mac10': 1050, 'MP9': 1250, 'MP7': 1700, 'UMP': 1200, 'Bizon': 1400, 'P90': 2350, 
             'Gallil': 2000, 'Famas': 2250, 'Scout': 1700, 'AK47': 2700, 'M4A4': 3100, 'M4A1': 3100, 'SG556': 3000, 'AUG': 3300, 'G3SG1': 5000, 'Scar20': 5000, 'AWP': 4750, 
             'Decoy': 50, 'Flash': 200, 'Smoke': 300, 'HE': 300, 'Incendiary': 600, 'Molotov': 400, 
             'Knife': 0, 'Zeus': 200}

### Data

In [None]:
dmg_demos = pd.read_csv('../data/csgo/esea_master_dmg_clean_demos.csv')
nade_demos = pd.read_csv('../data/csgo/esea_master_grenades_clean_demos.csv')

## **1st Direct estimacion from records**

### Team Value

In [20]:
def plus_armor(team_value):
    """
    A simple way to add armor value to team value. 
    It could be more precise.
    """
    if team_value <= 1250:
        return team_value + 3250 # 5 chest armor (650$ each)
    elif team_value > 1250:
        return team_value + 4000 # 5 full armor (1000$ each)

In [21]:
def defuse_kit(team_value):
    """
    Add defuse kit values if the team value reaches a minimum value
    Only in CT data!
    """
    if team_value > 10000:
        return 5000 # 5 defuse kits (400$ each)
    else:
        return 0  

In [13]:
# Let's take first only one match

In [14]:
file = 'esea_match_13770997.dem'

In [15]:
# rounds = dmg_demos[dmg_demos['file'] == file]['round'].max()

# Change the way to select the round number. 
# There are some missing rounds so lets take literally the round, not the max to do a range

In [16]:
rounds = dmg_demos[dmg_demos['file'] == file]['round'].unique()

In [11]:
df_file = dmg_demos[dmg_demos['file'] == file]
df_file_nade = nade_demos[nade_demos['file'] == file]

lst_ct_val = []
lst_t_val = []

for rnd in rounds:
    
    # 1st WAPONS
    
    # ---------> Put Bomb and Unknown filters!
    
    df_round_ct = df_file[(df_file['round'] == rnd) & (df_file['wp_type'] != 'Grenade') & (df_file['att_side'] == 'CounterTerrorist')]
    df_round_t = df_file[(df_file['round'] == rnd) & (df_file['wp_type'] != 'Grenade') & (df_file['att_side'] == 'Terrorist')]
    
    wp_ct = df_round_ct[['att_id', 'wp']].drop_duplicates(subset='att_id')
    wp_t = df_round_t[['att_id', 'wp']].drop_duplicates(subset='att_id')
    # Drop_duplicates with subset to take the first record of each player. This way we reduce the error of taking a dropped gun
    
    wp_ct_val = sum([wp_values[weapon] for weapon in wp_ct['wp']])
    wp_t_val = sum([wp_values[weapon] for weapon in wp_t['wp']])
    # Value of the guns the team is carrying (CT & T)
    
    # 2nd GRENADES
    df_round_ct_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'CounterTerrorist')]
    df_round_t_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'Terrorist')]
    
    nade_ct = df_round_ct_nade[['att_id', 'nade']].drop_duplicates()
    nade_t = df_round_t_nade[['att_id', 'nade']].drop_duplicates()
    # Drop_duplicates to take all the nades of each player (we asume the error of taking dropped nades)
    
    nade_ct_val = sum([wp_values[nade] for nade in nade_ct['nade']])
    nade_t_val = sum([wp_values[nade] for nade in nade_t['nade']])
    # Value of the grenades the team is carrying (CT & T)
    
    # Join both values (Weapons & Grenades)
    ct_val = wp_ct_val + nade_ct_val
    t_val = wp_t_val + nade_t_val
    
    # Adding armor values
    ct_val = plus_armor(ct_val) + defuse_kit(ct_val)
    t_val = plus_armor(t_val)
    
    # Append the values to the lists lst_ct_val & lst_t_val
    lst_ct_val.append(ct_val)
    lst_t_val.append(t_val)

In [12]:
print(lst_ct_val)
print()
print(lst_t_val)

[4350, 6800, 7200, 22200, 5800, 25850, 5700, 6200, 26550, 23150, 28150, 24000, 22250, 13950, 20100, 4050, 19850, 13050, 19850, 22550, 25450, 13200, 10150, 24700, 13100]

[4050, 17400, 14950, 18700, 13750, 15100, 14150, 14250, 13550, 6650, 10900, 13600, 8600, 11100, 12450, 3850, 9400, 4050, 13400, 9350, 16000, 16300, 11300, 9500, 6700]


### Comparison

To make the comparison we take the team value from the DataFrame esea_meta_demos.csv

But first we have to filter and take only the games that have the full rounds records, as the are some games with missing rounds.

In [None]:
meta_demos = pd.read_csv('../data/csgo/esea_meta_demos.csv')

In [None]:
files_complete = [file for file in meta_demos['file'].unique() if\
                     meta_demos[meta_demos['file'] == file]['round'].max() == \
                     len(meta_demos[meta_demos['file'] == file])]

#### **Let's create a DataFrame with the team value of our generator and the real team value**

We will create lists with the data that will be the different columns

In [12]:
# LISTS

lst_file = []
lst_round = []
lst_ct_val_pred = []
lst_t_val_pred = []
lst_ct_val_real = []
lst_t_val_real = []
lst_round_type = []

## Attention! 4 hours to run this cell below

### This is the first attemp to obtain and create the DataFrame from the original data.
### There is an optimized version in 2_1_ml_preprocessingdata.ipynb

In [13]:
# LISTS

lst_file = []
lst_round = []
lst_ct_val_pred = []
lst_t_val_pred = []
lst_ct_val_real = []
lst_t_val_real = []
lst_round_type = []

# Create the loop around files and rounds
log = 0 # Log to see the progress

# Make some previous filtering to optimize the loop
flt_nade = dmg_demos['wp_type'] != 'Grenade'
flt_unknown = dmg_demos['wp_type'] != 'Unkown' # not well written in original data
flt_bomb = dmg_demos['wp'] != 'Bomb'

dmg_demos_flt = dmg_demos[flt_nade & flt_unknown & flt_bomb]

for file in files_complete:
    
    log += 1
    if log % 500 == 0: print(f'{log} / {len(files_complete)} ............')
    
#     df_file_dmg = dmg_demos[dmg_demos['file'] == file]
#     df_file_nade = nade_demos[nade_demos['file'] == file]
#     df_file_meta = meta_demos[meta_demos['file'] == file]
    
    df_file_dmg = dmg_demos_flt[dmg_demos['file'] == file]
    df_file_nade = nade_demos[nade_demos['file'] == file]
    df_file_meta = meta_demos[meta_demos['file'] == file]
    
    rounds = df_file_dmg['round'].unique()
    
    for rnd in rounds:
        # 1st WEAPONS
#         flt_nade = df_file_dmg['wp_type'] != 'Grenade'
#         flt_unknown = df_file_dmg['wp_type'] != 'Unkown' # not well written in original data
#         flt_bomb = df_file_dmg['wp'] != 'Bomb'
        
#         df_round_ct_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & flt_nade & flt_unknown & flt_bomb & (df_file_dmg['att_side'] == 'CounterTerrorist')]
#         df_round_t_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & flt_nade & flt_unknown & flt_bomb & (df_file_dmg['att_side'] == 'Terrorist')]
        df_round_ct_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'CounterTerrorist')]
        df_round_t_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'Terrorist')]
        
        wp_ct = df_round_ct_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
        wp_t = df_round_t_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
        # Drop_duplicates with subset to take the first record of each player. This way we reduce the error of taking a dropped gun

#         wp_ct_val = sum([wp_values[weapon] for weapon in wp_ct['wp']])
#         wp_t_val = sum([wp_values[weapon] for weapon in wp_t['wp']])    
        wp_ct_val_records = sum([wp_values[weapon] for weapon in wp_ct['wp']])
        n_wp = len(wp_ct['wp'])
        if n_wp > 0:
            wp_ct_val = wp_ct_val_records + ((wp_ct_val_records/n_wp) * (5 - n_wp))
        elif n_wp == 0:
            wp_ct_val = wp_ct_val_records

        wp_t_val_records = sum([wp_values[weapon] for weapon in wp_t['wp']])
        n_wp = len(wp_t['wp'])
        if n_wp > 0:
            wp_t_val = wp_t_val_records + ((wp_t_val_records/n_wp) * (5 - n_wp))    
        elif n_wp == 0:
            wp_t_val = wp_t_val_records
        # Value of the guns the team is carrying (CT & T)

        # 2nd GRENADES
        df_round_ct_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'CounterTerrorist')]
        df_round_t_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'Terrorist')]

        nade_ct = df_round_ct_nade[['att_id', 'nade']].drop_duplicates()
        nade_t = df_round_t_nade[['att_id', 'nade']].drop_duplicates()
        # Drop_duplicates to take all the nades of each player (we asume the error of taking dropped nades)

        nade_ct_val = sum([wp_values[nade] for nade in nade_ct['nade']])
        nade_t_val = sum([wp_values[nade] for nade in nade_t['nade']])
        # Value of the grenades the team is carrying (CT & T)

        # Join both values (Weapons & Grenades)
        ct_val = wp_ct_val + nade_ct_val
        t_val = wp_t_val + nade_t_val

        # Adding armor & defuse kit values
        ct_val = plus_armor(ct_val) + defuse_kit(ct_val)
        t_val = plus_armor(t_val)
        
        # Extracting real CT & T team value and round_type from meta_demos
        ct_val_real = df_file_meta[df_file_meta['round'] == rnd]['ct_eq_val'].values[0]
        t_val_real = df_file_meta[df_file_meta['round'] == rnd]['t_eq_val'].values[0]
        round_type = df_file_meta[df_file_meta['round'] == rnd]['round_type'].values[0]
        # .values to get only the column value, not also the index
        # [0] because .values return a 1 dimension array

        # Append the values to the previosly defined lists 
        lst_file.append(file)
        lst_round.append(rnd)
        lst_ct_val_pred.append(ct_val)
        lst_t_val_pred.append(t_val)
        lst_ct_val_real.append(ct_val_real)
        lst_t_val_real.append(t_val_real)
        lst_round_type.append(round_type)



500 / 12185 ............
1000 / 12185 ............
1500 / 12185 ............
2000 / 12185 ............
2500 / 12185 ............
3000 / 12185 ............
3500 / 12185 ............
4000 / 12185 ............
4500 / 12185 ............
5000 / 12185 ............
5500 / 12185 ............
6000 / 12185 ............
6500 / 12185 ............
7000 / 12185 ............
7500 / 12185 ............
8000 / 12185 ............
8500 / 12185 ............
9000 / 12185 ............
9500 / 12185 ............
10000 / 12185 ............
10500 / 12185 ............
11000 / 12185 ............
11500 / 12185 ............
12000 / 12185 ............


In [14]:
df = pd.DataFrame([lst_file, lst_round, lst_ct_val_pred, lst_t_val_pred, lst_ct_val_real, lst_t_val_real, lst_round_type]).T
df.columns = ['file', 'round', 'ct_val_pred', 't_val_pred', 'ct_val_real', 't_val_real', 'round_type']

df.to_csv('../data/processed/estimated_team_value.csv', index=False) # Saved .csv with the full data comparison

### Load the DataFrame 

In [22]:
df = pd.read_csv('../data/processed/estimated_team_value.csv')

In [28]:
print(df.shape)
df.head()

(310813, 8)


Unnamed: 0,index,file,round,ct_val_pred,t_val_pred,ct_val_real,t_val_real,round_type
0,0,esea_match_13779704.dem,1,5550.0,6366.666667,4550,3850,PISTOL_ROUND
1,1,esea_match_13779704.dem,2,20200.0,7737.5,18450,5300,ECO
2,2,esea_match_13779704.dem,3,9025.0,18150.0,9550,22900,SEMI_ECO
3,3,esea_match_13779704.dem,4,4250.0,17300.0,1600,19650,ECO
4,4,esea_match_13779704.dem,5,25900.0,18450.0,23350,21750,NORMAL


### Differences estimated / real

In [30]:
df['ct_error'] = df['ct_val_real'] - df['ct_val_pred']
df['t_error'] = df['t_val_real'] - df['t_val_pred']

In [31]:
df.describe()

Unnamed: 0,index,round,ct_val_pred,t_val_pred,ct_val_real,t_val_real,ct_error,t_error
count,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0,310813.0
mean,155406.0,13.636219,19989.764623,16335.579015,18887.598331,18229.730738,-1102.166292,1894.151723
std,89724.128946,8.036385,9577.524503,6202.982439,10547.244034,8837.245568,3774.883741,4182.317258
min,0.0,1.0,3250.0,3250.0,200.0,400.0,-32150.0,-23333.333333
25%,77703.0,7.0,8933.333333,11866.666667,7400.0,10700.0,-3225.0,-1200.0
50%,155406.0,13.0,23825.0,18400.0,21750.0,21100.0,-1075.0,2400.0
75%,233109.0,20.0,27862.5,20800.0,28150.0,25450.0,1012.5,4775.0
max,310812.0,60.0,38950.0,33550.0,42050.0,38150.0,34100.0,32500.0


In [32]:
df.head()

Unnamed: 0,index,file,round,ct_val_pred,t_val_pred,ct_val_real,t_val_real,round_type,ct_error,t_error
0,0,esea_match_13779704.dem,1,5550.0,6366.666667,4550,3850,PISTOL_ROUND,-1000.0,-2516.666667
1,1,esea_match_13779704.dem,2,20200.0,7737.5,18450,5300,ECO,-1750.0,-2437.5
2,2,esea_match_13779704.dem,3,9025.0,18150.0,9550,22900,SEMI_ECO,525.0,4750.0
3,3,esea_match_13779704.dem,4,4250.0,17300.0,1600,19650,ECO,-2650.0,2350.0
4,4,esea_match_13779704.dem,5,25900.0,18450.0,23350,21750,NORMAL,-2550.0,3300.0


In [37]:
print(f"CT Error: {mean_squared_error(y_pred=df['ct_val_pred'], y_true=df['ct_val_real'], squared=False)}")
print(f"T Error: {mean_squared_error(y_pred=df['t_val_pred'], y_true=df['t_val_real'], squared=False)}")

CT Error: 3932.489281724597
T Error: 4591.245160143518
