In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
california = fetch_california_housing()
X = pd.DataFrame(california.data, columns=california.feature_names)
y = california.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

degrees = [1, 2, 3]
alphas = [0.1, 1.0, 10.0]
l1_ratios = [0.2, 0.5, 0.8]

results = []


In [4]:
for degree in degrees:
    for model in [Ridge(), Lasso(), ElasticNet()]:
        if isinstance(model, ElasticNet):
            for l1_ratio in l1_ratios:
                pipeline = Pipeline([
                    ('poly', PolynomialFeatures(degree=degree)),
                    ('scaler', StandardScaler()),
                    ('model', ElasticNet())
                ])

                param_grid = {
                    'model__alpha': alphas,
                    'model__l1_ratio': [l1_ratio]
                }

                grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
                grid_search.fit(X_train, y_train)
                best_model = grid_search.best_estimator_
                y_pred = best_model.predict(X_test)
                rmse = np.sqrt(mean_squared_error(y_test, y_pred))
                mae = mean_absolute_error(y_test, y_pred)
                results.append((degree, grid_search.best_params_['model__alpha'], l1_ratio, rmse, mae))
        else:
            pipeline = Pipeline([
                ('poly', PolynomialFeatures(degree=degree)),
                ('scaler', StandardScaler()),
                ('model', model)
            ])
            param_grid = {
                'model__alpha': alphas
            }
            grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
            grid_search.fit(X_train, y_train)
            best_model = grid_search.best_estimator_
            y_pred = best_model.predict(X_test)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
            mae = mean_absolute_error(y_test, y_pred)
            results.append((degree, grid_search.best_params_['model__alpha'], None, rmse, mae))

best_result = min(results, key=lambda x: x[3]) 
degree, alpha, l1_ratio, best_rmse, best_mae = best_result

In [5]:
print(f"Best model parameters:")
print(f"Degree: {degree}")
print(f"Alpha: {alpha}")
if l1_ratio is not None:
    print(f"L1 Ratio: {l1_ratio}")
print(f"RMSE: {best_rmse}")
print(f"MAE: {best_mae}")

Best model parameters:
Degree: 2
Alpha: 1.0
RMSE: 0.6876651202073971
MAE: 0.48797138549474045
