In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Simular datos
np.random.seed(42)
n = 500
X1 = np.random.normal(0, 1, n)
X2 = np.random.normal(2, 1.5, n)
logits = -1 + 0.8 * X1 + 1.2 * X2
prob = 1 / (1 + np.exp(-logits))
y = np.random.binomial(1, prob)

# Crear DataFrame
df = pd.DataFrame({'X1': X1, 'X2': X2, 'y': y})
X = df[['X1', 'X2']]
y = df['y']

# División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir grid de hiperparámetros para RandomForestRegressor
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 3, 5, 10],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

# Inicializar modelo y GridSearchCV
rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error', return_train_score=True)
grid_search.fit(X_train, y_train)

# Mejor modelo y predicciones
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluación con MSE
mse = mean_squared_error(y_test, y_pred)

# Imprimir resultados
print("Mejores hiperparámetros:", grid_search.best_params_)
print(f"Error cuadrático medio (MSE) en test: {mse:.4f}\n")

# Mostrar todos los resultados del grid
results = pd.DataFrame(grid_search.cv_results_)
results = results[['param_n_estimators', 'param_max_depth', 'param_min_samples_split',
                   'param_min_samples_leaf', 'mean_test_score', 'std_test_score']]
results['mean_test_score'] = -results['mean_test_score']  # Convertimos a MSE positivo
results = results.rename(columns={'mean_test_score': 'mean_MSE', 'std_test_score': 'std_MSE'})

print("Resumen de resultados (ordenados por MSE):")
print(results.sort_values(by='mean_MSE'))


Mejores hiperparámetros: {'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}
Error cuadrático medio (MSE) en test: 0.1299

Resumen de resultados (ordenados por MSE):
   param_n_estimators param_max_depth param_min_samples_split  \
17                200               3                       5   
29                200               5                       5   
23                200               3                       5   
14                200               3                       2   
20                200               3                       2   
16                100               3                       5   
28                100               5                       5   
35                200               5                       5   
22                100               3                       5   
13                100               3                       2   
33                 50               5                       5   
19                100