In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.linear_model import LinearRegression,Ridge,Lasso,ElasticNet,RidgeCV,LassoCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [3]:
X,y = load_diabetes(return_X_y=True)

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [5]:
# Linear Regression
reg = LinearRegression()
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))

Test MAE : 45.213034190469024
Test MSE : 3094.4566715660626
Test r2_score : 0.4399338661568968


In [6]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

# --------------------------------------------------
# 1. Lambda (alpha) values
# --------------------------------------------------
alphas = np.logspace(-4, 4, 20)

# --------------------------------------------------
# 2. Pipeline (scaling + ridge)
# --------------------------------------------------
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("ridge", Ridge())
])

# --------------------------------------------------
# 3. GridSearchCV (TRUE K-FOLD CV)
# --------------------------------------------------
param_grid = {
    "ridge__alpha": alphas
}

gs = GridSearchCV(
    pipe,
    param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    return_train_score=True
)

# --------------------------------------------------
# 4. Fit ONLY on training data
# --------------------------------------------------
gs.fit(X_train, y_train)

# --------------------------------------------------
# 5. CV RESULTS (THIS IS WHAT YOU WANTED)
# --------------------------------------------------
cv_results = pd.DataFrame({
    "Lambda (alpha)": gs.cv_results_["param_ridge__alpha"].astype(float),
    "CV Mean Squared Error": -gs.cv_results_["mean_test_score"]
})

print("\nCross-Validation Results:")
print(cv_results)

# --------------------------------------------------
# 6. Best lambda
# --------------------------------------------------
best_lambda = gs.best_params_["ridge__alpha"]
print("\nBest lambda (α):", best_lambda)

# --------------------------------------------------
# 7. Train FINAL model with best lambda
# --------------------------------------------------
final_model = gs.best_estimator_

# --------------------------------------------------
# 8. Test performance (USED ONLY ONCE)
# --------------------------------------------------
y_pred = final_model.predict(X_test)

print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))


Cross-Validation Results:
    Lambda (alpha)  CV Mean Squared Error
0         0.000100            3070.545744
1         0.000264            3070.543101
2         0.000695            3070.536135
3         0.001833            3070.517789
4         0.004833            3070.469552
5         0.012743            3070.343315
6         0.033598            3070.016922
7         0.088587            3069.199068
8         0.233572            3067.305322
9         0.615848            3063.661801
10        1.623777            3058.799635
11        4.281332            3054.551588
12       11.288379            3050.099013
13       29.763514            3048.595900
14       78.475997            3083.454954
15      206.913808            3254.803725
16      545.559478            3686.789528
17     1438.449888            4374.658322
18     3792.690191            5098.828682
19    10000.000000            5600.396887

Best lambda (α): 29.763514416313132
Test MAE : 45.37542013786235
Test MSE : 3037.060983934

In [7]:
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("ridge", Ridge(alpha=0.1))
])

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))

Test MAE : 45.21678738318099
Test MSE : 3093.121407613642
Test r2_score : 0.4401755357612457


In [8]:
# Ridge 
reg = Ridge(alpha=0.1)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))

Test MAE : 45.40403801924985
Test MSE : 3027.817261919799
Test r2_score : 0.45199494197195456


In [9]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

# --------------------------------------------------
# 1. Lambda (alpha) values (smaller range for LASSO)
# --------------------------------------------------
alphas = np.logspace(-4, 1, 20)

# --------------------------------------------------
# 2. Pipeline (scaling + lasso)
# --------------------------------------------------
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("lasso", Lasso(max_iter=10000))
])

# --------------------------------------------------
# 3. GridSearchCV (TRUE K-FOLD CV)
# --------------------------------------------------
param_grid = {
    "lasso__alpha": alphas
}

gs = GridSearchCV(
    pipe,
    param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    return_train_score=True
)

# --------------------------------------------------
# 4. Fit ONLY on training data
# --------------------------------------------------
gs.fit(X_train, y_train)

# --------------------------------------------------
# 5. CV RESULTS
# --------------------------------------------------
cv_results = pd.DataFrame({
    "Lambda (alpha)": gs.cv_results_["param_lasso__alpha"].astype(float),
    "CV Mean Squared Error": -gs.cv_results_["mean_test_score"]
})

print("\nCross-Validation Results:")
print(cv_results)

# --------------------------------------------------
# 6. Best lambda
# --------------------------------------------------
best_lambda = gs.best_params_["lasso__alpha"]
print("\nBest lambda (α):", best_lambda)

# --------------------------------------------------
# 7. Train FINAL model with best lambda
# --------------------------------------------------
final_model = gs.best_estimator_

# --------------------------------------------------
# 8. Test performance
# --------------------------------------------------
y_pred = final_model.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))
# --------------------------------------------------
# 9. Coefficients (sparsity check)
# --------------------------------------------------
print("\nLASSO Coefficients:")
print(final_model.named_steps["lasso"].coef_)



Cross-Validation Results:
    Lambda (alpha)  CV Mean Squared Error
0         0.000100            3070.538361
1         0.000183            3070.530872
2         0.000336            3070.517164
3         0.000616            3070.492084
4         0.001129            3070.446299
5         0.002069            3070.362975
6         0.003793            3070.212119
7         0.006952            3069.942297
8         0.012743            3069.469945
9         0.023357            3067.935702
10        0.042813            3066.068200
11        0.078476            3063.087426
12        0.143845            3055.158617
13        0.263665            3043.632255
14        0.483293            3038.597681
15        0.885867            3030.885087
16        1.623777            3030.932416
17        2.976351            3068.199414
18        5.455595            3118.445133
19       10.000000            3239.950590

Best lambda (α): 0.8858667904100823
Test MAE : 45.70285894586719
Test MSE : 3101.478297349

In [10]:
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("lasso", Lasso(alpha=0.01))
])

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))

Test MAE : 45.2208173323363
Test MSE : 3093.4386464553195
Test r2_score : 0.44011811866014494


In [11]:
# Lasso
reg = Lasso(alpha=0.01)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.441118559631106

In [12]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

# --------------------------------------------------
# 1. Hyperparameter values
# --------------------------------------------------
alphas = np.logspace(-4, 1, 20)
l1_ratios = [0.1, 0.3, 0.5, 0.7, 0.9]

# --------------------------------------------------
# 2. Pipeline (scaling + elastic net)
# --------------------------------------------------
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("elastic", ElasticNet(max_iter=10000))
])

# --------------------------------------------------
# 3. GridSearchCV (TRUE K-FOLD CV)
# --------------------------------------------------
param_grid = {
    "elastic__alpha": alphas,
    "elastic__l1_ratio": l1_ratios
}

gs = GridSearchCV(
    pipe,
    param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    return_train_score=True
)

# --------------------------------------------------
# 4. Fit ONLY on training data
# --------------------------------------------------
gs.fit(X_train, y_train)

# --------------------------------------------------
# 5. CV RESULTS
# --------------------------------------------------
cv_results = pd.DataFrame({
    "Alpha": gs.cv_results_["param_elastic__alpha"].astype(float),
    "L1_ratio": gs.cv_results_["param_elastic__l1_ratio"].astype(float),
    "CV Mean Squared Error": -gs.cv_results_["mean_test_score"]
})

print("\nCross-Validation Results:")
print(cv_results)

# --------------------------------------------------
# 6. Best hyperparameters
# --------------------------------------------------
best_alpha = gs.best_params_["elastic__alpha"]
best_l1_ratio = gs.best_params_["elastic__l1_ratio"]

print("\nBest alpha:", best_alpha)
print("Best l1_ratio:", best_l1_ratio)

# --------------------------------------------------
# 7. Train FINAL model with best params
# --------------------------------------------------
final_model = gs.best_estimator_

# --------------------------------------------------
# 8. Test performance
# --------------------------------------------------
y_pred = final_model.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))
# --------------------------------------------------
# 9. Coefficients (sparsity + grouping check)
# --------------------------------------------------
print("\nElasticNet Coefficients:")
print(final_model.named_steps["elastic"].coef_)



Cross-Validation Results:
      Alpha  L1_ratio  CV Mean Squared Error
0    0.0001       0.1            3070.141521
1    0.0001       0.3            3070.228538
2    0.0001       0.5            3070.316217
3    0.0001       0.7            3070.404568
4    0.0001       0.9            3070.493593
..      ...       ...                    ...
95  10.0000       0.1            4842.876637
96  10.0000       0.3            4713.710134
97  10.0000       0.5            4530.730023
98  10.0000       0.7            4240.828832
99  10.0000       0.9            3706.414525

[100 rows x 3 columns]

Best alpha: 0.4832930238571752
Best l1_ratio: 0.9
Test MAE : 45.543582458414136
Test MSE : 3063.4954369591496
Test r2_score : 0.4455375442192324

ElasticNet Coefficients:
[ 0.         -8.10329123 24.19664419 15.19742107 -5.7927838  -0.98203522
 -8.23155092  1.30352418 26.07797595  2.56351033]


In [13]:
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("lasso", ElasticNet(alpha=0.005,l1_ratio=0.9))
])

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))

Test MAE : 45.2228850425305
Test MSE : 3091.7789732078227
Test r2_score : 0.44041850314693043


In [14]:
# ElasticNet
reg = ElasticNet(alpha=0.005,l1_ratio=0.9) # alpha= a/b ,l1_ratio=a/a+b,a for square,b for mod
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
print("Test MAE :",mean_absolute_error(y_test,y_pred))
print("Test MSE :",mean_squared_error(y_test,y_pred))
print("Test r2_score :",r2_score(y_test,y_pred))

Test MAE : 45.490541207407745
Test MSE : 3021.44944426422
Test r2_score : 0.4531474541554822
