In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error

# Simular datos
np.random.seed(42)
n = 500
X1 = np.random.normal(0, 1, n)
X2 = np.random.normal(2, 1.5, n)
logits = -1 + 0.8 * X1 + 1.2 * X2
prob = 1 / (1 + np.exp(-logits))
y = np.random.binomial(1, prob)

# Crear DataFrame
df = pd.DataFrame({'X1': X1, 'X2': X2, 'y': y})
X = df[['X1', 'X2']]
y = df['y']

# División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir grid de hiperparámetros
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

# Inicializar modelo y GridSearchCV
logreg = LogisticRegression()
grid_search = GridSearchCV(logreg, param_grid, cv=5, scoring='neg_mean_squared_error', return_train_score=True)
grid_search.fit(X_train, y_train)

# Mejor modelo y predicciones
best_model = grid_search.best_estimator_
y_prob = best_model.predict_proba(X_test)[:, 1]

# Evaluación con MSE
mse = mean_squared_error(y_test, y_prob)

# Imprimir resultados
print("Mejores hiperparámetros:", grid_search.best_params_)
print(f"Error cuadrático medio (MSE) en test: {mse:.4f}\n")

# Mostrar todos los resultados del grid
results = pd.DataFrame(grid_search.cv_results_)
results = results[['param_C', 'param_penalty', 'mean_test_score', 'std_test_score']]
results['mean_test_score'] = -results['mean_test_score']  # Convertimos a MSE positivo
results = results.rename(columns={'mean_test_score': 'mean_MSE', 'std_test_score': 'std_MSE'})

print("Resumen de resultados (ordenados por MSE):")
print(results.sort_values(by='mean_MSE'))


Mejores hiperparámetros: {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}
Error cuadrático medio (MSE) en test: 0.1336

Resumen de resultados (ordenados por MSE):
   param_C param_penalty  mean_MSE   std_MSE
6        1            l1    0.2000  0.023717
7        1            l2    0.2000  0.023717
8       10            l1    0.2000  0.023717
9       10            l2    0.2000  0.023717
10     100            l1    0.2025  0.025495
11     100            l2    0.2025  0.025495
5      0.1            l2    0.2050  0.030208
4      0.1            l1    0.2275  0.022913
3     0.01            l2    0.2550  0.024495
2     0.01            l1    0.2600  0.014577
1    0.001            l2    0.2750  0.007906
0    0.001            l1    0.6900  0.005000
