In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.linear_model import LinearRegression,Ridge,Lasso,ElasticNet,RidgeCV,LassoCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [3]:
X,y = load_diabetes(return_X_y=True)

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [5]:
# Linear Regression
reg = LinearRegression()
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))

Test MAE : 45.213034190469024
Test MSE : 3094.4566715660626
Test r2_score : 0.4399338661568968


In [6]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

# --------------------------------------------------
# 1. Lambda (alpha) values
# --------------------------------------------------
alphas = np.logspace(-4, 4, 20)

# --------------------------------------------------
# 2. Pipeline (scaling + ridge)
# --------------------------------------------------
pipe = Pipeline([
    ("ridge", Ridge())
])

# --------------------------------------------------
# 3. GridSearchCV (TRUE K-FOLD CV)
# --------------------------------------------------
param_grid = {
    "ridge__alpha": alphas
}

gs = GridSearchCV(
    pipe,
    param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    return_train_score=True
)

# --------------------------------------------------
# 4. Fit ONLY on training data
# --------------------------------------------------
gs.fit(X_train, y_train)

# --------------------------------------------------
# 5. CV RESULTS (THIS IS WHAT YOU WANTED)
# --------------------------------------------------
cv_results = pd.DataFrame({
    "Lambda (alpha)": gs.cv_results_["param_ridge__alpha"].astype(float),
    "CV Mean Squared Error": -gs.cv_results_["mean_test_score"]
})

print("\nCross-Validation Results:")
print(cv_results)

# --------------------------------------------------
# 6. Best lambda
# --------------------------------------------------
best_lambda = gs.best_params_["ridge__alpha"]
print("\nBest lambda (α):", best_lambda)

# --------------------------------------------------
# 7. Train FINAL model with best lambda
# --------------------------------------------------
final_model = gs.best_estimator_

# --------------------------------------------------
# 8. Test performance (USED ONLY ONCE)
# --------------------------------------------------
y_pred = final_model.predict(X_test)

print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))


Cross-Validation Results:
    Lambda (alpha)  CV Mean Squared Error
0         0.000100            3069.806890
1         0.000264            3068.691051
2         0.000695            3066.227945
3         0.001833            3061.965900
4         0.004833            3057.245903
5         0.012743            3053.673373
6         0.033598            3050.191887
7         0.088587            3055.571720
8         0.233572            3118.967514
9         0.615848            3357.053787
10        1.623777            3872.596381
11        4.281332            4602.403952
12       11.288379            5278.664740
13       29.763514            5698.561169
14       78.475997            5898.358985
15      206.913808            5981.579402
16      545.559478            6014.325161
17     1438.449888            6026.921513
18     3792.690191            6031.724748
19    10000.000000            6033.550203

Best lambda (α): 0.03359818286283781
Test MAE : 45.35434025760515
Test MSE : 3054.74390213

In [7]:
# Ridge 
reg = Ridge(alpha=0.1)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))

Test MAE : 45.40403801924985
Test MSE : 3027.817261919799
Test r2_score : 0.45199494197195456


In [8]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

# --------------------------------------------------
# 1. Lambda (alpha) values (smaller range for LASSO)
# --------------------------------------------------
alphas = np.logspace(-4, 1, 20)

# --------------------------------------------------
# 2. Pipeline (scaling + lasso)
# --------------------------------------------------
pipe = Pipeline([
    ("lasso", Lasso(max_iter=10000))
])

# --------------------------------------------------
# 3. GridSearchCV (TRUE K-FOLD CV)
# --------------------------------------------------
param_grid = {
    "lasso__alpha": alphas
}

gs = GridSearchCV(
    pipe,
    param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    return_train_score=True
)

# --------------------------------------------------
# 4. Fit ONLY on training data
# --------------------------------------------------
gs.fit(X_train, y_train)

# --------------------------------------------------
# 5. CV RESULTS
# --------------------------------------------------
cv_results = pd.DataFrame({
    "Lambda (alpha)": gs.cv_results_["param_lasso__alpha"].astype(float),
    "CV Mean Squared Error": -gs.cv_results_["mean_test_score"]
})

print("\nCross-Validation Results:")
print(cv_results)

# --------------------------------------------------
# 6. Best lambda
# --------------------------------------------------
best_lambda = gs.best_params_["lasso__alpha"]
print("\nBest lambda (α):", best_lambda)

# --------------------------------------------------
# 7. Train FINAL model with best lambda
# --------------------------------------------------
final_model = gs.best_estimator_

# --------------------------------------------------
# 8. Test performance
# --------------------------------------------------
y_pred = final_model.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))
# --------------------------------------------------
# 9. Coefficients (sparsity check)
# --------------------------------------------------
print("\nLASSO Coefficients:")
print(final_model.named_steps["lasso"].coef_)



Cross-Validation Results:
    Lambda (alpha)  CV Mean Squared Error
0         0.000100            3070.350728
1         0.000183            3070.189909
2         0.000336            3069.902151
3         0.000616            3069.398044
4         0.001129            3067.782584
5         0.002069            3065.804203
6         0.003793            3062.540933
7         0.006952            3054.783772
8         0.012743            3043.657389
9         0.023357            3038.644167
10        0.042813            3031.231070
11        0.078476            3033.427680
12        0.143845            3070.545902
13        0.263665            3121.937117
14        0.483293            3252.083432
15        0.885867            3681.713453
16        1.623777            4909.358864
17        2.976351            6034.666577
18        5.455595            6034.666577
19       10.000000            6034.666577

Best lambda (α): 0.04281332398719392
Test MAE : 45.70866329668324
Test MSE : 3102.00595750

In [9]:
# Lasso
reg = Lasso(alpha=0.01)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.441118559631106

In [10]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

# --------------------------------------------------
# 1. Hyperparameter values
# --------------------------------------------------
alphas = np.logspace(-4, 1, 20)
l1_ratios = [0.1, 0.3, 0.5, 0.7, 0.9]

# --------------------------------------------------
# 2. Pipeline (scaling + elastic net)
# --------------------------------------------------
pipe = Pipeline([
    ("elastic", ElasticNet(max_iter=10000))
])

# --------------------------------------------------
# 3. GridSearchCV (TRUE K-FOLD CV)
# --------------------------------------------------
param_grid = {
    "elastic__alpha": alphas,
    "elastic__l1_ratio": l1_ratios
}

gs = GridSearchCV(
    pipe,
    param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    return_train_score=True
)

# --------------------------------------------------
# 4. Fit ONLY on training data
# --------------------------------------------------
gs.fit(X_train, y_train)

# --------------------------------------------------
# 5. CV RESULTS
# --------------------------------------------------
cv_results = pd.DataFrame({
    "Alpha": gs.cv_results_["param_elastic__alpha"].astype(float),
    "L1_ratio": gs.cv_results_["param_elastic__l1_ratio"].astype(float),
    "CV Mean Squared Error": -gs.cv_results_["mean_test_score"]
})

print("\nCross-Validation Results:")
print(cv_results)

# --------------------------------------------------
# 6. Best hyperparameters
# --------------------------------------------------
best_alpha = gs.best_params_["elastic__alpha"]
best_l1_ratio = gs.best_params_["elastic__l1_ratio"]

print("\nBest alpha:", best_alpha)
print("Best l1_ratio:", best_l1_ratio)

# --------------------------------------------------
# 7. Train FINAL model with best params
# --------------------------------------------------
final_model = gs.best_estimator_

# --------------------------------------------------
# 8. Test performance
# --------------------------------------------------
y_pred = final_model.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))
# --------------------------------------------------
# 9. Coefficients (sparsity + grouping check)
# --------------------------------------------------
print("\nElasticNet Coefficients:")
print(final_model.named_steps["elastic"].coef_)



Cross-Validation Results:
      Alpha  L1_ratio  CV Mean Squared Error
0    0.0001       0.1            3051.081137
1    0.0001       0.3            3052.020926
2    0.0001       0.5            3053.221250
3    0.0001       0.7            3054.939565
4    0.0001       0.9            3059.614405
..      ...       ...                    ...
95  10.0000       0.1            6032.921464
96  10.0000       0.3            6034.666577
97  10.0000       0.5            6034.666577
98  10.0000       0.7            6034.666577
99  10.0000       0.9            6034.666577

[100 rows x 3 columns]

Best alpha: 0.0011288378916846883
Best l1_ratio: 0.9
Test MAE : 45.37056235461899
Test MSE : 3051.609166819767
Test r2_score : 0.44768884186833147

ElasticNet Coefficients:
[  -0.68456978 -187.87492091  511.79173763  327.98212536 -149.47930224
  -30.7405401  -156.10379317   68.77726153  548.83146361   62.37877353]


In [11]:
# ElasticNet
reg = ElasticNet(alpha=0.005,l1_ratio=0.9) # alpha= a/b ,l1_ratio=a/a+b,a for square,b for mod
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))

Test MAE : 45.490541207407745
Test MSE : 3021.44944426422
Test r2_score : 0.4531474541554822
