In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

# Cargar datos
data_path = '/Users/luissalamanca/Desktop/Duoc/Machine/ML_Proyecto_Semestral/data/03_features/engineered_data.csv'
data = pd.read_csv(data_path, sep=';')

# Separar columnas concatenadas
if len(data.columns) == 1:
    column_name = data.columns[0]
    if ',' in column_name:
        new_columns = column_name.split(',')
        data_split = data[column_name].str.split(',', expand=True)
        data_split.columns = new_columns
        for col in data_split.columns:
            data_split[col] = pd.to_numeric(data_split[col], errors='coerce')
        data = data_split

# Lista de features y target
features = [
    'EconomicEfficiency',
    'EffectivenessScore',
    'EquipmentAdvantage',
    'KillAssistRatio',
    'StealthKillsRatio'
]
X = data[features]
y = data['KDA']

# Dividir en entrenamiento/prueba (30% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [2]:
pipeline_gb = Pipeline([
    ('scaler', RobustScaler()),
    ('regressor', GradientBoostingRegressor(
        random_state=42
    ))
])

param_gb = {
    'regressor__n_estimators': [100, 200],
    'regressor__learning_rate': [0.1, 0.05],
    'regressor__max_depth': [3, 5],
    'regressor__min_samples_split': [2, 5]
}

gs_gb = GridSearchCV(
    estimator=pipeline_gb,
    param_grid=param_gb,
    cv=5,
    n_jobs=-1,
    scoring='r2',
    verbose=1
)

# Ajustar GridSearchCV para Gradient Boosting
gs_gb.fit(X_train, y_train)

# Mejor modelo y predicción
best_gb = gs_gb.best_estimator_
y_pred_gb = best_gb.predict(X_test)

# Métricas
r2_gb = r2_score(y_test, y_pred_gb)
mse_gb = mean_squared_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mse_gb)
mae_gb = mean_absolute_error(y_test, y_pred_gb)

print("\n=== GRADIENT BOOSTING REGRESSOR ===")
print("Mejores hiperparámetros encontrados:")
print(gs_gb.best_params_)
print(f"R² (Coef. determinación): {r2_gb:.4f}")
print(f"MSE:  {mse_gb:.4f}")
print(f"RMSE: {rmse_gb:.4f}")
print(f"MAE:  {mae_gb:.4f}")

Fitting 5 folds for each of 16 candidates, totalling 80 fits

=== GRADIENT BOOSTING REGRESSOR ===
Mejores hiperparámetros encontrados:
{'regressor__learning_rate': 0.1, 'regressor__max_depth': 3, 'regressor__min_samples_split': 2, 'regressor__n_estimators': 100}
R² (Coef. determinación): 0.8918
MSE:  0.0788
RMSE: 0.2807
MAE:  0.1740
