In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

# Cargar datos
data_path = '/Users/luissalamanca/Desktop/Duoc/Machine/ML_Proyecto_Semestral/data/03_features/engineered_data.csv'
data = pd.read_csv(data_path, sep=';')

# Separar columnas concatenadas
if len(data.columns) == 1:
    column_name = data.columns[0]
    if ',' in column_name:
        new_columns = column_name.split(',')
        data_split = data[column_name].str.split(',', expand=True)
        data_split.columns = new_columns
        for col in data_split.columns:
            data_split[col] = pd.to_numeric(data_split[col], errors='coerce')
        data = data_split

# Lista de features y target
features = [
    'EconomicEfficiency',
    'EffectivenessScore',
    'EquipmentAdvantage',
    'KillAssistRatio',
    'StealthKillsRatio'
]
X = data[features]
y = data['KDA']

# Dividir en entrenamiento/prueba (30% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [None]:
pipeline_svr = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', SVR())
])

# Definir malla de parámetros para SVR
param_svr = {
    'regressor__kernel': ['rbf'],
    'regressor__C': [1, 10, 100],
    'regressor__gamma': ['scale', 'auto'],
    'regressor__epsilon': [0.01, 0.1, 0.2]
}

gs_svr = GridSearchCV(
    estimator=pipeline_svr,
    param_grid=param_svr,
    cv=5,
    n_jobs=-1,
    scoring='r2',
    verbose=1
)

# Ajustar GridSearchCV
gs_svr.fit(X_train, y_train)

# Mejor modelo encontrado y sus predicciones
best_svr = gs_svr.best_estimator_
y_pred_svr = best_svr.predict(X_test)

# Métricas
r2_svr = r2_score(y_test, y_pred_svr)
mse_svr = mean_squared_error(y_test, y_pred_svr)
rmse_svr = np.sqrt(mse_svr)
mae_svr = mean_absolute_error(y_test, y_pred_svr)

print("\n=== SVR (Support Vector Regressor) ===")
print("Mejores hiperparámetros encontrados:")
print(gs_svr.best_params_)
print(f"R² (Coef. determinación): {r2_svr:.4f}")
print(f"MSE:  {mse_svr:.4f}")
print(f"RMSE: {rmse_svr:.4f}")
print(f"MAE:  {mae_svr:.4f}")

Fitting 5 folds for each of 18 candidates, totalling 90 fits
