In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor

# Simular datos
np.random.seed(42)
n = 500
X1 = np.random.normal(0, 1, n)
X2 = np.random.normal(2, 1.5, n)
logits = -1 + 0.8 * X1 + 1.2 * X2
prob = 1 / (1 + np.exp(-logits))
y = np.random.binomial(1, prob)

# Crear DataFrame
df = pd.DataFrame({'X1': X1, 'X2': X2, 'y': y})
X = df[['X1', 'X2']]
y = df['y']

# División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir grid de hiperparámetros para XGBRegressor
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.3],
    'subsample': [0.8, 1.0]
}

# Inicializar modelo y GridSearchCV
xgb = XGBRegressor(objective='reg:squarederror', random_state=42, verbosity=0)
grid_search = GridSearchCV(xgb, param_grid, cv=5, scoring='neg_mean_squared_error', return_train_score=True)
grid_search.fit(X_train, y_train)

# Mejor modelo y predicciones
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluación con MSE
mse = mean_squared_error(y_test, y_pred)

# Imprimir resultados
print("Mejores hiperparámetros:", grid_search.best_params_)
print(f"Error cuadrático medio (MSE) en test: {mse:.4f}\n")

# Mostrar todos los resultados del grid
results = pd.DataFrame(grid_search.cv_results_)
cols = ['param_n_estimators', 'param_max_depth', 'param_learning_rate', 'param_subsample',
        'mean_test_score', 'std_test_score']
results = results[cols]
results['mean_test_score'] = -results['mean_test_score']  # Convertimos a MSE positivo
results = results.rename(columns={'mean_test_score': 'mean_MSE', 'std_test_score': 'std_MSE'})

print("Resumen de resultados (ordenados por MSE):")
print(results.sort_values(by='mean_MSE'))

Mejores hiperparámetros: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Error cuadrático medio (MSE) en test: 0.1358

Resumen de resultados (ordenados por MSE):
   param_n_estimators param_max_depth param_learning_rate param_subsample  \
4                 200               3                0.01             0.8   
5                 200               3                0.01             1.0   
19                 50               3                 0.1             1.0   
18                 50               3                 0.1             0.8   
10                200               5                0.01             0.8   
2                 100               3                0.01             0.8   
21                100               3                 0.1             1.0   
8                 100               5                0.01             0.8   
3                 100               3                0.01             1.0   
11                200               5