<a href="https://colab.research.google.com/github/Hugusss/ML-AI-course-FinalChallenge/blob/main/ChallengeMLCourse_XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
import torch

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.impute import KNNImputer

from xgboost import XGBClassifier


if torch.cuda.is_available():
    DEVICE = "cuda"
    print("There is GPU available. Printing GPU information:")
    !nvidia-smi
else:
    DEVICE = "cpu"
    print("There is no GPU available, using CPU.")

There is GPU available. Printing GPU information:
Fri Sep  5 11:18:19 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   38C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------

In [4]:
train_df = pd.read_csv("dragones.csv", sep=",")   # ajusta sep si fuera necesario (coma o tabulador)
test_df = pd.read_csv("dragones_test.csv", sep=",")

print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)

Train shape: (680, 11)
Test shape: (175, 10)


In [5]:
X = train_df.drop(columns=["Winner"])
y = train_df["Winner"]

In [6]:
# =========================================
# 4. Feature Engineering
# =========================================

def add_features(df):
    df = df.copy()
    # Evitar divisiones por cero
    df["PowerPerHealth"] = df["PotentialPower"] / (df["Health"] + 1e-6)
    df["MysticPerAge"] = df["MysticPower"] / (df["DragonAge"].fillna(0) + 1)
    df["AgeHealth"] = df["DragonAge"].fillna(0) * df["Health"].fillna(0)
    df["AgeClutch"] = df["DragonAge"].fillna(0) * df["ClutchSize"].fillna(0)
    return df

X = add_features(X)
test_df = add_features(test_df)

In [7]:
# =========================================
# 5. Identificar columnas
# =========================================
cat_cols = X.select_dtypes(include="object").columns.tolist()
num_cols = X.select_dtypes(exclude="object").columns.tolist()

In [8]:
# =========================================
# 6. Preprocesamiento
# =========================================

# Codificación categórica
encoder = OrdinalEncoder()
X[cat_cols] = encoder.fit_transform(X[cat_cols])
test_df[cat_cols] = encoder.transform(test_df[cat_cols])

# Escalado numérico para KNNImputer
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])
test_df[num_cols] = scaler.transform(test_df[num_cols])

# Imputación con KNN
imputer = KNNImputer(n_neighbors=5)
X[num_cols] = imputer.fit_transform(X[num_cols])
test_df[num_cols] = imputer.transform(test_df[num_cols])

In [9]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [10]:
model = XGBClassifier(
    eval_metric="logloss",
    random_state=42
)

model.fit(X_train, y_train)

In [11]:
y_pred_val = model.predict(X_val)
acc = accuracy_score(y_val, y_pred_val)
print("Accuracy en validación (mejorado):", acc)

Accuracy en validación (mejorado): 0.8235294117647058


In [20]:
test_preds = model.predict(test_df)

# Guardar a archivo txt
np.savetxt("predicciones.txt", test_preds, fmt="%d")
print("Archivo predicciones.txt generado.")

Archivo predicciones.txt generado.
