In [1]:
from sklearn.datasets import fetch_california_housing
california = fetch_california_housing()
X = california.data
y = california.target

In [2]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np

alphas = np.logspace(-4, 2, 20)
ratios = np.linspace(0, 1, 10)

**Definicion de modelos**

In [3]:
lasso = Pipeline([
    ("scaler", StandardScaler()),
    ("model", Lasso(max_iter=5000))
])

ridge = Pipeline([
    ("scaler", StandardScaler()),
    ("model", Ridge(max_iter=5000))
])

elastic = Pipeline([
    ("scaler", StandardScaler()),
    ("model", ElasticNet(max_iter=5000))
])

param_lasso = {"model__alpha": alphas}
param_ridge = {"model__alpha": alphas}
param_elastic = {
    "model__alpha": alphas,
    "model__l1_ratio": ratios
}

**Validacion**

In [4]:
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error

scoring = {
    "rmse": make_scorer(mean_squared_error, greater_is_better=False, squared=True),
    "mae": make_scorer(mean_absolute_error, greater_is_better=False)
}

**Lasso**

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error


grid_lasso = GridSearchCV(
    estimator=lasso,
    param_grid=param_lasso,
    scoring="neg_mean_squared_error",  
    cv=5,
    n_jobs=-1
)

grid_lasso.fit(X, y)

best_alpha_lasso = grid_lasso.best_params_["model__alpha"]
print("Mejor alpha (Lasso):", best_alpha_lasso)

y_pred = grid_lasso.predict(X)

rmse = np.sqrt(mean_squared_error(y, y_pred))
mae = mean_absolute_error(y, y_pred)

print(f"RMSE (Lasso): {rmse:.4f}")
print(f"MAE (Lasso):  {mae:.4f}")

best_lasso = grid_lasso.best_estimator_["model"]
coef = best_lasso.coef_

for name, c in zip(california.feature_names, coef):
    print(f"{name}: {c:.4f}")

num_zeros = np.sum(coef == 0)
print("Número de coeficientes eliminados (cero):", num_zeros)

Mejor alpha (Lasso): 0.0008858667904100823
RMSE (Lasso): 0.7241
MAE (Lasso):  0.5313
MedInc: 0.8251
HouseAge: 0.1196
AveRooms: -0.2535
AveBedrms: 0.2937
Population: -0.0033
AveOccup: -0.0385
Latitude: -0.8903
Longitude: -0.8604
Número de coeficientes eliminados (cero): 0


**Gridge**

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np


grid_ridge = GridSearchCV(
    estimator=ridge,
    param_grid=param_ridge,
    scoring="neg_mean_squared_error",  
    cv=5,
    n_jobs=-1
)

grid_ridge.fit(X, y)

best_alpha_ridge = grid_ridge.best_params_["model__alpha"]
print("Mejor alpha (Ridge):", best_alpha_ridge)

y_pred_ridge = grid_ridge.predict(X)
rmse_ridge = np.sqrt(mean_squared_error(y, y_pred_ridge))
mae_ridge  = mean_absolute_error(y, y_pred_ridge)

print(f"RMSE (Ridge): {rmse_ridge:.4f}")
print(f"MAE  (Ridge): {mae_ridge:.4f}")

best_ridge = grid_ridge.best_estimator_["model"]
coef_ridge = best_ridge.coef_

print("\nCoeficientes Ridge:")
for name, c in zip(california.feature_names, coef_ridge):
    print(f"{name}: {c:.6f}")

near_zero_count = np.sum(np.abs(coef_ridge) < 0.01)
print("\nNúmero de coeficientes |coef| < 0.01 (Ridge):", near_zero_count)


Mejor alpha (Ridge): 11.288378916846883
RMSE (Ridge): 0.7241
MAE  (Ridge): 0.5311

Coeficientes Ridge:
MedInc: 0.829310
HouseAge: 0.119481
AveRooms: -0.264055
AveBedrms: 0.303760
Population: -0.004246
AveOccup: -0.039365
Latitude: -0.892954
Longitude: -0.863544

Número de coeficientes |coef| < 0.01 (Ridge): 1


**Elastic Net**

In [None]:
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, make_scorer
import numpy as np


grid_elastic = GridSearchCV(
    estimator=elastic,
    param_grid=param_elastic,
    scoring="neg_mean_squared_error",  # selecciona por MSE
    cv=5,
    n_jobs=-1
)

grid_elastic.fit(X, y)

best_params_elastic = grid_elastic.best_params_
best_alpha_elastic   = best_params_elastic["model__alpha"]
best_l1_ratio        = best_params_elastic["model__l1_ratio"]

print("Mejores hiperparámetros (Elastic Net):")
print("  alpha   =", best_alpha_elastic)
print("  l1_ratio=", best_l1_ratio)

best_elastic = grid_elastic.best_estimator_


cv_mse = cross_val_score(best_elastic, X, y, cv=5, scoring="neg_mean_squared_error", n_jobs=-1)
cv_rmse = np.sqrt(-cv_mse.mean())


cv_mae = -cross_val_score(best_elastic, X, y, cv=5, scoring="neg_mean_absolute_error", n_jobs=-1).mean()

print(f"\nElastic Net – CV (5 folds)")
print(f"RMSE: {cv_rmse:.4f}")
print(f"MAE : {cv_mae:.4f}")


Mejores hiperparámetros (Elastic Net):
  alpha   = 0.0008858667904100823
  l1_ratio= 1.0

Elastic Net – CV (5 folds)
RMSE: 0.7472
MAE : 0.5477


**Tabla Comparativa**

In [None]:
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error

# ---- Lasso ----
y_pred_lasso = grid_lasso.predict(X)
rmse_lasso = np.sqrt(mean_squared_error(y, y_pred_lasso))
mae_lasso  = mean_absolute_error(y, y_pred_lasso)

# ---- Ridge ----
y_pred_ridge = grid_ridge.predict(X)
rmse_ridge = np.sqrt(mean_squared_error(y, y_pred_ridge))
mae_ridge  = mean_absolute_error(y, y_pred_ridge)

# ---- Elastic Net ----
# Usamos valores calculados antes: cv_rmse, cv_mae
rmse_elastic = cv_rmse
mae_elastic  = cv_mae

results = pd.DataFrame({
    "Modelo": ["Lasso", "Ridge", "Elastic Net"],
    "Mejor α": [
        best_alpha_lasso,
        best_alpha_ridge,
        best_alpha_elastic
    ],
    "l1_ratio (EN)": [
        None,
        None,
        best_l1_ratio
    ],
    "RMSE": [rmse_lasso, rmse_ridge, rmse_elastic],
    "MAE":  [mae_lasso, mae_ridge, mae_elastic]
})

results


Unnamed: 0,Modelo,Mejor α,l1_ratio (EN),RMSE,MAE
0,Lasso,0.000886,,0.72413,0.531262
1,Ridge,11.288379,,0.724105,0.531131
2,Elastic Net,0.000886,1.0,0.747157,0.547679
