In [1]:
import math
import pandas as pd
import numpy as np

In [2]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

***

In [3]:
df = pd.read_csv("csgo_imputed_shifted.csv")

In [4]:
# encode winner_side, next_round_winner
side_dict = {"Terrorist":0, "CounterTerrorist":1, np.nan:.5}
df[["next_round_winner", "winner_side"]] = df[["next_round_winner", "winner_side"]].applymap(lambda x: side_dict[x])

df[["next_round_winner", "winner_side"]]

Unnamed: 0,next_round_winner,winner_side
0,0.0,0.5
1,0.0,0.0
2,0.0,0.0
3,1.0,0.0
4,0.0,1.0
...,...,...
375327,0.0,0.0
375328,0.0,0.0
375329,0.0,0.0
375330,0.0,0.0


In [9]:
# encode map
for map_ in df["map"].unique():
    df[f"map_{map_}"] = df["map"].apply(lambda x: 1 if x == map_ else 0)

df[[col for col in df.columns if "map" in col]]

Unnamed: 0,map,map_de_overpass,map_de_cache,map_de_inferno,map_de_mirage,map_de_train,map_de_dust2,map_de_cbble,map_de_nuke
0,de_overpass,1,0,0,0,0,0,0,0
1,de_overpass,1,0,0,0,0,0,0,0
2,de_overpass,1,0,0,0,0,0,0,0
3,de_overpass,1,0,0,0,0,0,0,0
4,de_overpass,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
375327,de_mirage,0,0,0,1,0,0,0,0
375328,de_mirage,0,0,0,1,0,0,0,0
375329,de_mirage,0,0,0,1,0,0,0,0
375330,de_mirage,0,0,0,1,0,0,0,0


In [10]:
df = df.drop("map", axis=1)

In [11]:
ycols = ["file", "next_round_winner", "was_missing"]
Xcols = list(set(df.columns) - set(ycols))

In [12]:
# scale Xcols
scaler = MinMaxScaler(feature_range=(0,1))
df[Xcols] = scaler.fit_transform(df[Xcols])

df[Xcols]

Unnamed: 0,t_wp_type_Sniper_kills,t_wp_type_Grenade_damage,t_wp_type_Unkown_damage,ct_wp_type_Grenade_kills,length_seconds,ct_damage,t_wp_type_Pistol_damage,ct_num_dmg_instances,ct_Molotov_hits,round,...,ct_eq_val,ct_wp_type_Equipment_kills,t_wp_type_Rifle_damage,ct_wp_type_Unkown_damage,t_wp_type_Heavy_kills,ct_wp_type_SMG_damage,ct_Incendiary_hits,winner_side,t_damage,t_wp_type_SMG_kills
0,0.0,0.006442,0.000202,0.001814,0.029389,0.230063,0.342985,0.156828,0.000168,0.000000,...,0.091762,0.004903,0.000087,0.00005,0.0,0.000513,0.003951,0.5,0.283685,0.000262
1,0.0,0.020000,0.000000,0.000000,0.052491,0.224138,0.000000,0.106667,0.000000,0.034483,...,0.145759,0.000000,0.270270,0.00000,0.0,0.000000,0.000000,0.0,0.314077,0.000000
2,0.0,0.000000,0.000000,0.000000,0.018862,0.114420,0.025641,0.106667,0.000000,0.068966,...,0.178017,0.000000,0.081081,0.00000,0.2,0.000000,0.000000,0.0,0.326029,0.400000
3,0.0,0.060000,0.000000,0.000000,0.038276,0.247649,0.000000,0.333333,0.000000,0.103448,...,0.590203,0.000000,0.594595,0.00000,0.2,0.000000,0.163934,0.0,0.364542,0.000000
4,0.0,0.060000,0.000000,0.000000,0.011292,0.336991,0.000000,0.253333,0.000000,0.137931,...,0.124253,0.000000,0.108108,0.00000,0.0,0.111111,0.000000,1.0,0.176627,0.200000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375327,0.0,0.000000,0.000000,0.000000,0.010484,0.132706,0.000000,0.120000,0.000000,0.586207,...,0.102748,0.000000,0.351351,0.00000,0.0,0.066667,0.000000,0.0,0.334661,0.200000
375328,0.0,0.000000,0.000000,0.000000,0.017190,0.262278,0.000000,0.146667,0.000000,0.620690,...,0.387097,0.000000,0.324324,0.00000,0.0,0.044444,0.000000,0.0,0.352590,0.000000
375329,0.0,0.000000,0.000000,0.000000,0.012124,0.192790,0.000000,0.160000,0.000000,0.655172,...,0.356033,0.000000,0.540541,0.00000,0.0,0.044444,0.000000,0.0,0.360558,0.000000
375330,0.0,0.000000,0.000000,0.000000,0.013430,0.198015,0.000000,0.186667,0.000000,0.689655,...,0.414576,0.000000,0.459459,0.00000,0.0,0.000000,0.032787,0.0,0.365206,0.000000


In [13]:
df[ycols]

Unnamed: 0,file,next_round_winner,was_missing
0,esea_match_13770997.dem,0.0,1
1,esea_match_13770997.dem,0.0,0
2,esea_match_13770997.dem,0.0,0
3,esea_match_13770997.dem,1.0,0
4,esea_match_13770997.dem,0.0,0
...,...,...,...
375327,esea_match_13829173.dem,0.0,0
375328,esea_match_13829173.dem,0.0,0
375329,esea_match_13829173.dem,0.0,0
375330,esea_match_13829173.dem,0.0,0


In [14]:
df.to_csv("csgo_imputed_shifted_encoded_scaled.csv", index=False)