## **Optimizing the DataFrame creation**

In [1]:
import pandas as pd
import numpy as np

import cProfile

In [2]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

### Weapon & Grenade Prices

In [3]:
wp_values = {'USP': 200, 'P2000': 200, 'Glock': 200, 'P250': 300, 'DualBarettas': 500, 'Tec9': 500, 'FiveSeven': 500, 'CZ': 500, 'Deagle': 700, 
             'Nova': 1200, 'Swag7': 1800, 'SawedOff': 1200, 'XM1014': 2000, 'M249': 5200, 'Negev': 4000,
             'Mac10': 1050, 'MP9': 1250, 'MP7': 1700, 'UMP': 1200, 'Bizon': 1400, 'P90': 2350, 
             'Gallil': 2000, 'Famas': 2250, 'Scout': 1700, 'AK47': 2700, 'M4A4': 3100, 'M4A1': 3100, 'SG556': 3000, 'AUG': 3300, 'G3SG1': 5000, 'Scar20': 5000, 'AWP': 4750, 
             'Decoy': 50, 'Flash': 200, 'Smoke': 300, 'HE': 300, 'Incendiary': 600, 'Molotov': 400, 
             'Knife': 0, 'Zeus': 200}

### Data

In [4]:
dmg_demos = pd.read_csv('../data/csgo/esea_master_dmg_clean_demos.csv')
nade_demos = pd.read_csv('../data/csgo/esea_master_grenades_clean_demos.csv')
meta_demos = pd.read_csv('../data/csgo/esea_meta_demos.csv')
kills_demos = pd.read_csv('../data/csgo/esea_master_kills_demos.csv')

## **2nd Supervised ML regression algorithm**
## **Preparing Data**

### **If time: try SPARK**

We will need 2 DataFrames: one for CT values and other for T values

Required DataFrame fields to reach a good prediction:
- file
- round
- weapons (value from records and fill the missing data with the mean
- grenades (value from records)
- players alive previous round
- winner team
- bomb planted previous round
- <del>bomb defused</del>
- consecutive wins
- <del>consecutive defeats</del>
- real team value from esea_meta_demos.csv
- round type from esea_meta_demos.csv

To be able to make the prediction we need to know that all the information is complete. Let's take complete games from esea_meta_demos.csv, the one with some missing rounds.

## <font color='red'>  files_complete, modified to list comprehension</font>

In [5]:
# files_complete = []

# for file in meta_demos['file'].unique():
#     test = meta_demos[meta_demos['file'] == file]['round'].max() == len(meta_demos[meta_demos['file'] == file])
#     if test == True:
#         files_complete.append(file)

# Too slow

files_complete = [file for file in meta_demos['file'].unique() if\
                     meta_demos[meta_demos['file'] == file]['round'].max() == \
                     len(meta_demos[meta_demos['file'] == file])]

In [6]:
log_files = files_complete[::500]

In [7]:
test_files = files_complete

## <font color='red'>Filters changed for .loc and procces modified to functions and list comprehension</font>

In [8]:
def weapons(rnd):
    
    """
    1st WEAPONS
    """
    
    df_round_ct_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'CounterTerrorist')]
    df_round_t_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'Terrorist')]

    wp_ct = df_round_ct_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
    wp_t = df_round_t_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
    # Drop_duplicates with subset to take the first record of each player. This way we reduce the error of taking a dropped gun

    wp_ct_val_records = sum([wp_values[weapon] for weapon in wp_ct['wp']])
    n_wp = len(wp_ct['wp'])
    if n_wp > 0:
        wp_ct_val = wp_ct_val_records + ((wp_ct_val_records/n_wp) * (5 - n_wp))
    elif n_wp == 0:
        wp_ct_val = wp_ct_val_records

    wp_t_val_records = sum([wp_values[weapon] for weapon in wp_t['wp']])
    n_wp = len(wp_t['wp'])
    if n_wp > 0:
        wp_t_val = wp_t_val_records + ((wp_t_val_records/n_wp) * (5 - n_wp))    
    elif n_wp == 0:
        wp_t_val = wp_t_val_records
    # Value of the guns the team is carrying (CT & T)
    return wp_ct_val, wp_t_val



def nades(rnd):
    
    """
    2nd GRENADES
    """
    
    df_round_ct_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'CounterTerrorist')]
    df_round_t_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'Terrorist')]

    nade_ct = df_round_ct_nade[['att_id', 'nade']].drop_duplicates()
    nade_t = df_round_t_nade[['att_id', 'nade']].drop_duplicates()
    # Drop_duplicates to take all the nades of each player (we asume the error of taking dropped nades)

    nade_ct_val = sum([wp_values[nade] for nade in nade_ct['nade']])
    nade_t_val = sum([wp_values[nade] for nade in nade_t['nade']])
    # Value of the grenades the team is carrying (CT & T)
    return nade_ct_val, nade_t_val
    
    
    
def players_alive(rnd):    
    
    """
    3rd PLAYER ALIVE previous round (CT & T)
    """
    
    if rnd > 1:
        ct_t_alive = df_file_kills[df_file_kills['round'] == rnd-1][['ct_alive', 't_alive']].min().to_list()
        ct_alive = ct_t_alive[0]
        t_alive = ct_t_alive[1]
    else:
        ct_alive = 5
        t_alive = 5    
        
    return ct_alive, t_alive



def winner_team(rnd):
    
    """
    4th WINNER TEAM previous round (one column, ct_winner filled with [1, 0])
    
    """
    if rnd > 1:
        if df_file_meta.reset_index(drop=True).loc[rnd-2, 'winner_side'] == 'CounterTerrorist':
            ct_winner = 1
        else:
            ct_winner = 0
    else:
        ct_winner = 0.5
        
    return [ct_winner]



def bomb_planted(rnd):
    
    """
    5th BOMB PLANTED previous round
    """
    
    if rnd > 1:
        if True in df_file_dmg[df_file_dmg['round'] == rnd-1]['is_bomb_planted'].to_list():
            bomb_planted = 1
        else:
            bomb_planted = 0
    else:
        bomb_planted = 0.5

    return [bomb_planted]



def cons_wins(rnd):
    
    """
    6th CONSECUTIVE WINS (CT & T)
    """
    
    df_consecutive = df_file_meta.reset_index(drop=True)
    wins_list = df_consecutive.loc[:rnd-2, 'winner_side'].to_list()[::-1]
    ct_cons_wins = 0
    t_cons_wins = 0

    for winner in wins_list: #One loop per team (CT)
        if winner == 'CounterTerrorist':
            ct_cons_wins += 1
        elif winner != 'CounterTerrorist':
            break

    for winner in wins_list: #One loop per team (T)
        if winner == 'Terrorist':
            t_cons_wins += 1
        elif winner != 'Terrorist':
            break
            
    return ct_cons_wins, t_cons_wins



def extr_val(rnd):

    """
    Extracting real CT & T team value and round_type from meta_demos
    """
    
    values = df_file_meta[df_file_meta['round'] == rnd][['ct_eq_val', 't_eq_val', 'round_type']].values[0]
#     ct_val_real = extr_val[0]
#     t_val_real = extr_val[1]
#     round_type = extr_val[2]
    return values[0], values[1], values[2]
    # .values to get only the column value, not also the index
    # [0] because .values return a 1 dimension array


    
def get_file():
    return [df_file_meta.index.unique()[0]]
    
    
def flatten_list(input_list):

    """
    input: list of lists. Example: [[1, 2], [3], [4, 5, 6]]
    output: flattened list. Example: [1, 2, 3, 4, 5, 6]
    """
    
    return [item for sublist in input_list for item in sublist]

In [12]:
log = 0

flt_nade = dmg_demos['wp_type'] != 'Grenade'
flt_unknown = dmg_demos['wp_type'] != 'Unkown' # not well written in original data
flt_bomb = dmg_demos['wp'] != 'Bomb'

dmg_demos_flt = dmg_demos[flt_nade & flt_unknown & flt_bomb]

index_dmg_demos_flt = dmg_demos_flt.set_index('file')
index_nade_demos = nade_demos.set_index('file')
index_meta_demos = meta_demos.set_index('file')
index_kills_demos = kills_demos.set_index('file')



def acq_round(rnd):
    
    """
    Executes the acquisition functions
    """
    
    return get_file(), [rnd], weapons(rnd), nades(rnd), players_alive(rnd), winner_team(rnd), bomb_planted(rnd), cons_wins(rnd), extr_val(rnd)

def df_from_file(file):
    
    """
    Input: file from which to extract the information
    Output: DataFrame with specific information from the file
    """
    
    global df_file_dmg
    global df_file_nade
    global df_file_meta
    global df_file_kills
    global log

    df_file_dmg = index_dmg_demos_flt.loc[file]
    df_file_nade = index_nade_demos.loc[file]
    df_file_meta = index_meta_demos.loc[file]
    df_file_kills = index_kills_demos.loc[file]
    
    df = [flatten_list(acq_round(rnd)) for rnd in df_file_dmg['round'].unique()]
    
    if file  in log_files:
        log += 1
        print(f'{log} / {len(log_files)} ............')
    
    return pd.DataFrame(df)

In [14]:
%%time

all_files = [df_from_file(file) for file in test_files]

1 / 122 ............
2 / 122 ............
3 / 122 ............
4 / 122 ............
5 / 122 ............
6 / 122 ............
7 / 122 ............
8 / 122 ............
9 / 122 ............
10 / 122 ............
11 / 122 ............
12 / 122 ............
13 / 122 ............
14 / 122 ............
15 / 122 ............
16 / 122 ............
17 / 122 ............
18 / 122 ............
19 / 122 ............
20 / 122 ............
21 / 122 ............
22 / 122 ............
23 / 122 ............
24 / 122 ............
25 / 122 ............
26 / 122 ............
27 / 122 ............
28 / 122 ............
29 / 122 ............
30 / 122 ............
31 / 122 ............
32 / 122 ............
33 / 122 ............
34 / 122 ............
35 / 122 ............
36 / 122 ............
37 / 122 ............
38 / 122 ............
39 / 122 ............
40 / 122 ............
41 / 122 ............
42 / 122 ............
43 / 122 ............
44 / 122 ............
45 / 122 ............
46 / 122 ..........

In [15]:
concat_df = pd.concat(all_files).reset_index(drop=True)

In [21]:
concat_df.columns = ['file', 'round', 'wp_ct_val', 'wp_t_val', 'nade_ct_val', 
                   'nade_t_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 
                   'ct_cons_wins', 't_cons_wins', 'ct_val_real', 't_val_real', 'round_type']

In [22]:
concat_df.tail()

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,t_val_real,round_type
310808,esea_match_13829171.dem,20,15500.0,13500.0,600,1900,0,4,0.0,0.0,0,3,23250,28150,NORMAL
310809,esea_match_13829171.dem,21,17562.5,16062.5,3200,1400,5,0,1.0,0.0,1,0,30950,25450,NORMAL
310810,esea_match_13829171.dem,22,23750.0,13500.0,2700,1400,2,0,1.0,0.0,2,0,32900,16600,FORCE_BUY
310811,esea_match_13829171.dem,23,14312.5,13500.0,1600,2600,2,4,0.0,0.0,0,1,25950,24450,NORMAL
310812,esea_match_13829171.dem,24,23750.0,20333.333333,1200,2700,0,2,0.0,1.0,0,2,14300,27500,FORCE_BUY


In [23]:
concat_df.to_csv('../data/processed/base_to_ml_predicted_team_value_optimized.csv')

---------
---------
---------

<br>
<br>
<br>

## <font color='red'>Filters changed for .loc <br>First attemp with lists and .append() </font>

In [19]:
%%time

# LISTS
lst_file = []
lst_round = []

lst_wp_ct_val = []
lst_wp_t_val = []
lst_nade_ct_val = []
lst_nade_t_val = []
lst_ct_alive = []
lst_t_alive = []
lst_ct_winner = []
lst_bomb_planted = []
lst_ct_cons_wins = []
lst_t_cons_wins = []

lst_ct_val_real = []
lst_t_val_real = []
lst_round_type = []



# Create the loop around files and rounds
log = 0 # Log to see the progress

# Make some previous filtering to optimize the loop

flt_nade = dmg_demos['wp_type'] != 'Grenade'
flt_unknown = dmg_demos['wp_type'] != 'Unkown' # not well written in original data
flt_bomb = dmg_demos['wp'] != 'Bomb'

dmg_demos_flt = dmg_demos[flt_nade & flt_unknown & flt_bomb]

index_dmg_demos_flt = dmg_demos_flt.set_index('file')
index_nade_demos = nade_demos.set_index('file')
index_meta_demos = meta_demos.set_index('file')
index_kills_demos = kills_demos.set_index('file')


for file in files_complete:
    
    log += 1
    if log % 500 == 0: print(f'{log} / {len(files_complete)} ............')
        
    
    df_file_dmg = index_dmg_demos_flt.loc[file]
    df_file_nade = index_nade_demos.loc[file]
    df_file_meta = index_meta_demos.loc[file]
    df_file_kills = index_kills_demos.loc[file]
    
    
    
#     df_file_dmg = dmg_demos_flt[dmg_demos['file'] == file]
#     df_file_nade = nade_demos[nade_demos['file'] == file]
#     df_file_meta = meta_demos[meta_demos['file'] == file]
#     df_file_kills = kills_demos[kills_demos['file'] == file]

    rounds = df_file_dmg['round'].unique()

    for rnd in rounds:

        # 1st WEAPONS
        df_round_ct_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'CounterTerrorist')]
        df_round_t_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'Terrorist')]

        wp_ct = df_round_ct_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
        wp_t = df_round_t_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
        # Drop_duplicates with subset to take the first record of each player. This way we reduce the error of taking a dropped gun

        wp_ct_val_records = sum([wp_values[weapon] for weapon in wp_ct['wp']])
        n_wp = len(wp_ct['wp'])
        if n_wp > 0:
            wp_ct_val = wp_ct_val_records + ((wp_ct_val_records/n_wp) * (5 - n_wp))
        elif n_wp == 0:
            wp_ct_val = wp_ct_val_records

        wp_t_val_records = sum([wp_values[weapon] for weapon in wp_t['wp']])
        n_wp = len(wp_t['wp'])
        if n_wp > 0:
            wp_t_val = wp_t_val_records + ((wp_t_val_records/n_wp) * (5 - n_wp))    
        elif n_wp == 0:
            wp_t_val = wp_t_val_records
        # Value of the guns the team is carrying (CT & T)

        # 2nd GRENADES
        df_round_ct_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'CounterTerrorist')]
        df_round_t_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'Terrorist')]

        nade_ct = df_round_ct_nade[['att_id', 'nade']].drop_duplicates()
        nade_t = df_round_t_nade[['att_id', 'nade']].drop_duplicates()
        # Drop_duplicates to take all the nades of each player (we asume the error of taking dropped nades)

        nade_ct_val = sum([wp_values[nade] for nade in nade_ct['nade']])
        nade_t_val = sum([wp_values[nade] for nade in nade_t['nade']])
        # Value of the grenades the team is carrying (CT & T)

        # 3rd PLAYER ALIVE previous round (CT & T)
        if rnd > 1:
            ct_t_alive = df_file_kills[df_file_kills['round'] == rnd-1][['ct_alive', 't_alive']].min().to_list()
            ct_alive = ct_t_alive[0]
            t_alive = ct_t_alive[1]
        else:
            ct_alive = 5
            t_alive = 5

        # 4th WINNER TEAM previous round (one column, ct_winner filled with [1, 0])
        if rnd > 1:
            if df_file_meta.reset_index(drop=True).loc[rnd-2, 'winner_side'] == 'CounterTerrorist':
                ct_winner = 1
            else:
                ct_winner = 0
        else:
            ct_winner = 0.5

        # 5th BOMB PLANTED previous round
        if rnd > 1:
            if True in df_file_dmg[df_file_dmg['round'] == rnd-1]['is_bomb_planted'].to_list():
                bomb_planted = 1
            else:
                bomb_planted = 0
        else:
            bomb_planted = 0.5

        # 6th CONSECUTIVE WINS (CT & T)
        df_consecutive = df_file_meta.reset_index(drop=True)
        wins_list = df_consecutive.loc[:rnd-2, 'winner_side'].to_list()[::-1]
        ct_cons_wins = 0
        t_cons_wins = 0

        for winner in wins_list: #One loop per team (CT)
            if winner == 'CounterTerrorist':
                ct_cons_wins += 1
            elif winner != 'CounterTerrorist':
                break

        for winner in wins_list: #One loop per team (T)
            if winner == 'Terrorist':
                t_cons_wins += 1
            elif winner != 'Terrorist':
                break

        # Extracting real CT & T team value and round_type from meta_demos
        ct_val_real = df_file_meta[df_file_meta['round'] == rnd]['ct_eq_val'].values[0]
        t_val_real = df_file_meta[df_file_meta['round'] == rnd]['t_eq_val'].values[0]
        round_type = df_file_meta[df_file_meta['round'] == rnd]['round_type'].values[0]
        # .values to get only the column value, not also the index
        # [0] because .values return a 1 dimension array


        # Append the values to the previosly defined lists 
        lst_file.append(file)
        lst_round.append(rnd)

        lst_wp_ct_val.append(wp_ct_val)
        lst_wp_t_val.append(wp_t_val)
        lst_nade_ct_val.append(nade_ct_val)
        lst_nade_t_val.append(nade_t_val)
        lst_ct_alive.append(ct_alive)
        lst_t_alive.append(t_alive)
        lst_ct_winner.append(ct_winner)
        lst_bomb_planted.append(bomb_planted)
        lst_ct_cons_wins.append(ct_cons_wins)
        lst_t_cons_wins.append(t_cons_wins)

        lst_ct_val_real.append(ct_val_real)
        lst_t_val_real.append(t_val_real)
        lst_round_type.append(round_type)

500 / 12185 ............
1000 / 12185 ............
1500 / 12185 ............
2000 / 12185 ............
2500 / 12185 ............
3000 / 12185 ............
3500 / 12185 ............
4000 / 12185 ............
4500 / 12185 ............
5000 / 12185 ............
5500 / 12185 ............
6000 / 12185 ............
6500 / 12185 ............
7000 / 12185 ............
7500 / 12185 ............
8000 / 12185 ............
8500 / 12185 ............
9000 / 12185 ............
9500 / 12185 ............
10000 / 12185 ............
10500 / 12185 ............
11000 / 12185 ............
11500 / 12185 ............
12000 / 12185 ............
CPU times: user 1h 25min 10s, sys: 13.8 s, total: 1h 25min 24s
Wall time: 1h 25min 24s


In [20]:
df = pd.DataFrame([lst_file, lst_round, lst_wp_ct_val, lst_wp_t_val, lst_nade_ct_val, 
                        lst_nade_t_val, lst_ct_alive, lst_t_alive, lst_ct_winner, lst_bomb_planted, 
                        lst_ct_cons_wins, lst_t_cons_wins, lst_ct_val_real, lst_t_val_real, lst_round_type]).T

df.columns = ['file', 'round', 'wp_ct_val', 'wp_t_val', 'nade_ct_val', 
                   'nade_t_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 
                   'ct_cons_wins', 't_cons_wins', 'ct_val_real', 't_val_real', 'round_type']

df.to_csv('../data/processed/base_to_ml_predicted_team_value_optimized_2.csv', index=False)

<BR>
<BR>
<BR>
    
    
## <font color='red'>Previous method</font>
### <font color='red'>Deprecated, too slow</font>

In [7]:
# LISTS
lst_file = []
lst_round = []

lst_wp_ct_val = []
lst_wp_t_val = []
lst_nade_ct_val = []
lst_nade_t_val = []
lst_ct_alive = []
lst_t_alive = []
lst_ct_winner = []
lst_bomb_planted = []
lst_ct_cons_wins = []
lst_t_cons_wins = []

lst_ct_val_real = []
lst_t_val_real = []
lst_round_type = []



# Create the loop around files and rounds
log = 0 # Log to see the progress

# Make some previous filtering to optimize the loop

flt_nade = dmg_demos['wp_type'] != 'Grenade'
flt_unknown = dmg_demos['wp_type'] != 'Unkown' # not well written in original data
flt_bomb = dmg_demos['wp'] != 'Bomb'

dmg_demos_flt = dmg_demos[flt_nade & flt_unknown & flt_bomb]

for file in files_complete:
    
    log += 1
    if log % 500 == 0: print(f'{log} / {len(files_complete)} ............')
    

    df_file_dmg = dmg_demos_flt[dmg_demos['file'] == file]
    df_file_nade = nade_demos[nade_demos['file'] == file]
    df_file_meta = meta_demos[meta_demos['file'] == file]
    df_file_kills = kills_demos[kills_demos['file'] == file]

    rounds = df_file_dmg['round'].unique()

    for rnd in rounds:

        # 1st WEAPONS
        df_round_ct_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'CounterTerrorist')]
        df_round_t_dmg = df_file_dmg[(df_file_dmg['round'] == rnd) & (df_file_dmg['att_side'] == 'Terrorist')]

        wp_ct = df_round_ct_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
        wp_t = df_round_t_dmg[['att_id', 'wp']].drop_duplicates(subset='att_id')
        # Drop_duplicates with subset to take the first record of each player. This way we reduce the error of taking a dropped gun

        wp_ct_val_records = sum([wp_values[weapon] for weapon in wp_ct['wp']])
        n_wp = len(wp_ct['wp'])
        if n_wp > 0:
            wp_ct_val = wp_ct_val_records + ((wp_ct_val_records/n_wp) * (5 - n_wp))
        elif n_wp == 0:
            wp_ct_val = wp_ct_val_records

        wp_t_val_records = sum([wp_values[weapon] for weapon in wp_t['wp']])
        n_wp = len(wp_t['wp'])
        if n_wp > 0:
            wp_t_val = wp_t_val_records + ((wp_t_val_records/n_wp) * (5 - n_wp))    
        elif n_wp == 0:
            wp_t_val = wp_t_val_records
        # Value of the guns the team is carrying (CT & T)

        # 2nd GRENADES
        df_round_ct_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'CounterTerrorist')]
        df_round_t_nade = df_file_nade[(df_file_nade['round'] == rnd) & (df_file_nade['att_side'] == 'Terrorist')]

        nade_ct = df_round_ct_nade[['att_id', 'nade']].drop_duplicates()
        nade_t = df_round_t_nade[['att_id', 'nade']].drop_duplicates()
        # Drop_duplicates to take all the nades of each player (we asume the error of taking dropped nades)

        nade_ct_val = sum([wp_values[nade] for nade in nade_ct['nade']])
        nade_t_val = sum([wp_values[nade] for nade in nade_t['nade']])
        # Value of the grenades the team is carrying (CT & T)

        # 3rd PLAYER ALIVE previous round (CT & T)
        if rnd > 1:
            ct_t_alive = df_file_kills[df_file_kills['round'] == rnd-1][['ct_alive', 't_alive']].min().to_list()
            ct_alive = ct_t_alive[0]
            t_alive = ct_t_alive[1]
        else:
            ct_alive = 5
            t_alive = 5

        # 4th WINNER TEAM previous round (one column, ct_winner filled with [1, 0])
        if rnd > 1:
            if df_file_meta.reset_index(drop=True).loc[rnd-2, 'winner_side'] == 'CounterTerrorist':
                ct_winner = 1
            else:
                ct_winner = 0
        else:
            ct_winner = 0.5

        # 5th BOMB PLANTED previous round
        if rnd > 1:
            if True in df_file_dmg[df_file_dmg['round'] == rnd-1]['is_bomb_planted'].to_list():
                bomb_planted = 1
            else:
                bomb_planted = 0
        else:
            bomb_planted = 0.5

        # 6th CONSECUTIVE WINS (CT & T)
        df_consecutive = df_file_meta.reset_index(drop=True)
        wins_list = df_consecutive.loc[:rnd-2, 'winner_side'].to_list()[::-1]
        ct_cons_wins = 0
        t_cons_wins = 0

        for winner in wins_list: #One loop per team (CT)
            if winner == 'CounterTerrorist':
                ct_cons_wins += 1
            elif winner != 'CounterTerrorist':
                break

        for winner in wins_list: #One loop per team (T)
            if winner == 'Terrorist':
                t_cons_wins += 1
            elif winner != 'Terrorist':
                break

        # Extracting real CT & T team value and round_type from meta_demos
        ct_val_real = df_file_meta[df_file_meta['round'] == rnd]['ct_eq_val'].values[0]
        t_val_real = df_file_meta[df_file_meta['round'] == rnd]['t_eq_val'].values[0]
        round_type = df_file_meta[df_file_meta['round'] == rnd]['round_type'].values[0]
        # .values to get only the column value, not also the index
        # [0] because .values return a 1 dimension array


        # Append the values to the previosly defined lists 
        lst_file.append(file)
        lst_round.append(rnd)

        lst_wp_ct_val.append(wp_ct_val)
        lst_wp_t_val.append(wp_t_val)
        lst_nade_ct_val.append(nade_ct_val)
        lst_nade_t_val.append(nade_t_val)
        lst_ct_alive.append(ct_alive)
        lst_t_alive.append(t_alive)
        lst_ct_winner.append(ct_winner)
        lst_bomb_planted.append(bomb_planted)
        lst_ct_cons_wins.append(ct_cons_wins)
        lst_t_cons_wins.append(t_cons_wins)

        lst_ct_val_real.append(ct_val_real)
        lst_t_val_real.append(t_val_real)
        lst_round_type.append(round_type)



500 / 12185 ............
1000 / 12185 ............
1500 / 12185 ............
2000 / 12185 ............
2500 / 12185 ............
3000 / 12185 ............
3500 / 12185 ............
4000 / 12185 ............
4500 / 12185 ............
5000 / 12185 ............
5500 / 12185 ............
6000 / 12185 ............
6500 / 12185 ............
7000 / 12185 ............
7500 / 12185 ............
8000 / 12185 ............
8500 / 12185 ............
9000 / 12185 ............
9500 / 12185 ............
10000 / 12185 ............
10500 / 12185 ............
11000 / 12185 ............
11500 / 12185 ............
12000 / 12185 ............


### Create and save the DataFrame

In [8]:
df = pd.DataFrame([lst_file, lst_round, lst_wp_ct_val, lst_wp_t_val, lst_nade_ct_val, 
                        lst_nade_t_val, lst_ct_alive, lst_t_alive, lst_ct_winner, lst_bomb_planted, 
                        lst_ct_cons_wins, lst_t_cons_wins, lst_ct_val_real, lst_t_val_real, lst_round_type]).T

df.columns = ['file', 'round', 'wp_ct_val', 'wp_t_val', 'nade_ct_val', 
                   'nade_t_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 
                   'ct_cons_wins', 't_cons_wins', 'ct_val_real', 't_val_real', 'round_type']

df.to_csv('../data/processed/base_to_ml_predicted_team_value.csv', index=False)