In [1]:
# Instalar CatBoost
!pip install catboost --quiet

# Importar y usar CatBoost
from catboost import CatBoostClassifier




[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Celda 1: Cargar datos
import pandas as pd, numpy as np
from google.colab import files

uploaded = files.upload()  # Subir el archivo CSV
df_master = pd.read_csv(next(iter(uploaded)))


Saving csgo_v2.csv to csgo_v2.csv


In [3]:
# Celda 2: Crear dummies para 'Map' y 'Team'
df_master = pd.get_dummies(df_master, columns=['Map','Team'], prefix=['Map','Team'], drop_first=True)


In [4]:
# Celda 3: Asegurar que la variable objetivo esté bien codificada
df_master['Survived'] = df_master['Survived'].astype(int)


In [5]:
# Celda 3: Asegurar que la variable objetivo esté bien codificada
df_master['Survived'] = df_master['Survived'].astype(int)


In [6]:
# Celda 4: Eliminar columnas con strings u objetos sospechosos
columnas_excluir = []
for col in df_master.columns:
    if df_master[col].dtype == 'object':
        columnas_excluir.append(col)
    elif df_master[col].astype(str).str.contains(r'[^\d\.-]').any():
        columnas_excluir.append(col)
df_master = df_master.drop(columns=columnas_excluir)


In [7]:
# Celda 5: Definir variables y mostrar las usadas
target = 'Survived'
base_feats = [c for c in df_master.columns if c != target]

print(f"Variables utilizadas para el modelo ({len(base_feats)}):")
print(base_feats)


Variables utilizadas para el modelo (24):
['id', 'InternalTeamId', 'MatchId', 'RoundId', 'TimeAlive', 'TravelledDistance', 'RLethalGrenadesThrown', 'RNonLethalGrenadesThrown', 'PrimaryAssaultRifle', 'PrimarySniperRifle', 'PrimaryHeavy', 'PrimarySMG', 'PrimaryPistol', 'RoundKills', 'RoundAssists', 'RoundHeadshots', 'RoundFlankKills', 'RoundStartingEquipmentValue', 'TeamStartingEquipmentValue', 'MatchKills', 'MatchFlankKills', 'MatchAssists', 'MatchHeadshots', 'Speed_mps']


In [8]:
# Celda 6: Preparar datos (split + SMOTE)
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

def obtener_datos(df, feats, test_size=0.2, seed=42):
    X, y = df[feats].copy(), df[target].copy()
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, stratify=y, random_state=seed)
    X_train_sm, y_train_sm = SMOTE(random_state=seed).fit_resample(X_train, y_train)
    return X_train_sm, X_test, y_train_sm, y_test

Xtr, Xte, ytr, yte = obtener_datos(df_master, base_feats)


In [9]:
# Celda 7: Entrenar modelo CatBoost
from catboost import CatBoostClassifier

cat = CatBoostClassifier(
    iterations=300,
    depth=6,
    learning_rate=0.05,
    loss_function='Logloss',
    verbose=False,
    random_state=42
)

cat.fit(Xtr, ytr)


<catboost.core.CatBoostClassifier at 0x7bf423e01950>

In [10]:
# Celda 8: Evaluación del modelo
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score

def evaluar(modelo, Xte, yte, nombre):
    prob = modelo.predict_proba(Xte)[:,1]
    pred = (prob >= 0.5).astype(int)
    print(f"\n=== {nombre} ===")
    print("AUC-ROC:", roc_auc_score(yte, prob).round(4))
    print(f"Acc   : {accuracy_score(yte, pred):.4f} | Prec: {precision_score(yte, pred):.4f} | Rec : {recall_score(yte, pred):.4f} | F1  : {f1_score(yte, pred):.4f}")

evaluar(cat, Xte, yte, "CatBoost")



=== CatBoost ===
AUC-ROC: 0.8119
Acc   : 0.7523 | Prec: 0.5982 | Rec : 0.7263 | F1  : 0.6560


In [11]:
import joblib

joblib.dump(cat, 'modelo_catboost.pkl')
print("Modelo CatBoost exportado como 'modelo_catboost.pkl'")

from google.colab import files
files.download('modelo_catboost.pkl')

Modelo CatBoost exportado como 'modelo_catboost.pkl'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>