In [3]:
import json # will be needed for saving preprocessing details
import numpy as np # for data manipulation
import pandas as pd # for data manipulation
from sklearn.model_selection import train_test_split # will be used for data split
from sklearn.ensemble import RandomForestClassifier # for training the algorithm
from sklearn.ensemble import ExtraTreesClassifier # for training the algorithm
import joblib # for saving algorithm and preprocessing objects
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_selector

In [4]:
url = 'https://raw.githubusercontent.com/AldoGuasch/MachineLearning/main/Anexo%20Forma%20B_demo_round_traces.csv'
df = pd.read_csv(url, sep=';')

In [5]:
del df['Unnamed: 0']
del df['AbnormalMatch']

In [6]:
df.head()

Unnamed: 0,Map,Team,InternalTeamId,MatchId,RoundId,SteamId,RoundWinner,MatchWinner,Survived,TimeAlive,...,RoundAssists,RoundHeadshots,RoundFlankKills,RoundStartingEquipmentValue,TeamStartingEquipmentValue,MatchKills,MatchFlankKills,MatchAssists,MatchHeadshots,AvgMatchKillDist
0,de_inferno,Terrorist,1,4,1,76561198036987787,False,True,False,51.120249,...,0,0,0,750,4400,0,0,0,0,501379.183091
1,de_inferno,Terrorist,1,4,1,76561197971812216,False,True,False,43.486626,...,0,0,0,800,4400,0,0,0,0,208807.587297
2,de_inferno,Terrorist,1,4,1,76561197972240652,False,True,False,37.354698,...,0,0,0,1000,4400,0,0,0,0,381652.027911
3,de_inferno,Terrorist,1,4,1,76561197975824962,False,True,False,47.678861,...,0,0,0,850,4400,0,0,0,0,289754.503091
4,de_inferno,Terrorist,1,4,1,76561197960331095,False,True,True,53.122511,...,0,0,0,1000,4400,0,0,0,0,254527.691176


In [7]:
X_train, X_test, y_train, y_test = train_test_split(
                                        df.drop(['MatchWinner'], axis = 'columns'),
                                        df['MatchWinner'],
                                        train_size   = 0.8,
                                        random_state = 1234,
                                        shuffle      = True
                                    )

In [8]:
train_mode = dict(X_train.mode().iloc[0])
print(train_mode)

{'Map': 'de_inferno', 'Team': 'Terrorist', 'InternalTeamId': 1, 'MatchId': 29, 'RoundId': 2, 'SteamId': 76561197960710573, 'RoundWinner': False, 'Survived': False, 'TimeAlive': 0.0, 'ScaledTimeAlive': 1.0, 'AvgCentroidDistance': 0.0, 'TravelledDistance': 0.0, 'AvgRoundVelocity': 0.0, 'AvgKillDistance': 0.0, 'AvgSiteDistance': 9621644.64013265, 'RLethalGrenadesThrown': 0, 'RNonLethalGrenadesThrown': 0, 'PrimaryAssaultRifle': 1.0, 'PrimarySniperRifle': 0.0, 'PrimaryHeavy': 0.0, 'PrimarySMG': 0.0, 'PrimaryPistol': 0, 'FirstKillTime': 0.0, 'RoundKills': 0, 'RoundAssists': 0, 'RoundHeadshots': 0, 'RoundFlankKills': 0, 'RoundStartingEquipmentValue': 4700, 'TeamStartingEquipmentValue': 23500, 'MatchKills': 2, 'MatchFlankKills': 0, 'MatchAssists': 0, 'MatchHeadshots': 1, 'AvgMatchKillDist': 0.0}


In [None]:
columnas_numericas = X_train.select_dtypes(include=['float64', 'int', 'bool']).columns.to_list()
columnas_no_numericas = X_train.select_dtypes(include=['object']).columns.to_list()

In [None]:
preprocessor = ColumnTransformer(
                   [('scale', StandardScaler(), columnas_numericas),
                    ('onehot', OneHotEncoder(), columnas_no_numericas)],
                remainder='passthrough')

In [None]:
X_train_prep = preprocessor.fit_transform(X_train)
X_test_prep  = preprocessor.transform(X_test)

In [None]:
decodificar_nombre = preprocessor.named_transformers_['onehot'].get_feature_names(columnas_no_numericas)
nombre_columnas = np.concatenate([columnas_numericas, decodificar_nombre])
Datos_train_prep = preprocessor.transform(X_train)
datos_train_prep = pd.DataFrame(Datos_train_prep, columns=nombre_columnas)
datos_train_prep.info()

In [None]:
# Crear el modelo con 1000 arboles
model = RandomForestClassifier(n_estimators=1000, 
                               bootstrap = True,verbose=2,
                               max_features = 'sqrt')
# entrenar!
model.fit(X_train_prep, y_train)

In [None]:
pred_y_forest = model.predict(X_test_prep)

In [None]:
# save preprocessing objects and RF algorithm
joblib.dump(train_mode, "./train_mode.joblib", compress=True)
joblib.dump(datos_train_prep, "./encoders.joblib", compress=True)
joblib.dump(model, "./random_forest.joblib", compress=True)