In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, root_mean_squared_error

In [3]:
df = pd.read_csv('data/df_depuracion.csv')

In [5]:
X = df.drop(columns=['prices_per_night'])
y = df['prices_per_night']


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Definimos el modelo y sus parámetros

In [9]:
lgb = LGBMRegressor(random_state=42) # Quizas el objective se pone aquí
param_grid = {
    'n_estimators': [100, 500, 700, 1000], # Mas arboles mejoran rendimiento
    'num_leaves' : [15, 31, 63, 127], # Aumenta flexibilidad (riesgo sobreajuste)
    'min_child_samples' : [10, 20, 50, 100], # Prevencion de sobreajuste
    'max_depth': [3, 5, 7, 10, 12], # control de complejidad
    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.25],
    'min_split_gain' : [0, 0.01, 0.1],
    'subsample': [ 0.7, 0.8, 1],
    'reg_lambda' : [0, 0.1, 1, 10], # Estabiliza
    'reg_alpha' : [0, 0.1, 1, 10], # reduce complejidad
    'feature_fraction' : [0.6, 0.8, 1.0], # Reduce riesgo sobreajuste
    'bagging_fraction' : [0.6, 0.8, 1.0],
    'bagging_freq' : [0, 1, 5],
    'boosting_type' : ['gbdt', 'dart', 'goss'],
    'objective' : ['regression', 'huber']
}

## Configuramos GridSearchCV

In [11]:
grid_search = GridSearchCV(estimator=lgb, param_grid=param_grid, 
                           scoring='neg_mean_absolute_error', cv=3, 
                           verbose=1, n_jobs=-1)

## Ejecutamos y mostramos

In [None]:
%%time

grid_search.fit(X_train, y_train)
# Mostramos los mejores parámetros y resultados
best_params = grid_search.best_params_
print("Mejores hiperparámetros:", best_params)

# comprobamos los  mejores parámetros
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Calculamos y mostramos las métricas
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("MAE:", mae)
print("MSE:", mse)
print("RMSE", rmse)
print("R²:", r2)
