In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_california_housing, load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score


In [25]:
#Task 1: Load & Split Dataset

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

X, y = load_diabetes(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(X_train.shape, X_test.shape)




(353, 10) (89, 10)


In [24]:
#Task 2 – Step 1: Baseline Linear Regression (No Regularization)

lr = LinearRegression()
lr.fit(X_train, y_train)

y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print("Baseline Linear Regression")
print("Training MSE:", train_mse)
print("Test MSE:", test_mse)
print("Coefficients:", lr.coef_)


Baseline Linear Regression
Training MSE: 0.051601906634910176
Test MSE: 0.0641088624702943
Coefficients: [ 1.97130218e-01 -2.79472278e-03 -2.27758664e-02 -3.28622398e-04
  4.11490191e-01  5.00171192e+00 -1.00587030e+00 -4.91570446e+00
  3.38393701e-01 -5.81425644e+00 -4.32261922e-01  1.26325368e-02
  8.24736376e-03  1.24507529e-03 -1.80785086e+01  2.20798677e+00
  4.27375913e+00 -1.81589526e+01  1.19449435e+00  3.01203668e+00
 -2.14438989e-01 -9.61718848e-03  8.71176397e-03  9.61253395e-04
 -1.32384962e-01 -7.62670138e-01 -6.15742798e-01  1.32619828e+00
 -1.02113249e+00 -1.27363832e+00]


In [15]:
#Step 2: Hyperparameter Tuning (Ridge & Lasso)

#Ridge Regression (L2)
ridge = Ridge()

param_grid_ridge = {'alpha': [0.01, 0.1, 1, 10, 100]}
grid_ridge = GridSearchCV(ridge, param_grid_ridge, cv=5, scoring='neg_mean_squared_error')
grid_ridge.fit(X_train, y_train)

best_ridge = grid_ridge.best_estimator_
print("Best Ridge Alpha:", grid_ridge.best_params_)


Best Ridge Alpha: {'alpha': 0.01}


In [23]:
#Lasso Regression (L1)

lasso = Lasso(max_iter=10000)

param_grid_lasso = {'alpha': [0.001, 0.01, 0.1, 1]}
grid_lasso = GridSearchCV(lasso, param_grid_lasso, cv=5, scoring='neg_mean_squared_error')
grid_lasso.fit(X_train, y_train)

best_lasso = grid_lasso.best_estimator_
print("Best Lasso Alpha:", grid_lasso.best_params_)


Best Lasso Alpha: {'alpha': 0.001}


In [48]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import Ridge, Lasso

# Example for Ridge
ridge_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('ridge', Ridge(alpha=1.0))
])
ridge_pipeline.fit(X_train, y_train)

# Example for Lasso
lasso_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('lasso', Lasso(alpha=0.1))
])
lasso_pipeline.fit(X_train, y_train)

# Now evaluation is simple (use raw X_train and X_test)
ridge_train_mse = mean_squared_error(y_train, ridge_pipeline.predict(X_train))
ridge_test_mse  = mean_squared_error(y_test, ridge_pipeline.predict(X_test))

lasso_train_mse = mean_squared_error(y_train, lasso_pipeline.predict(X_train))
lasso_test_mse  = mean_squared_error(y_test, lasso_pipeline.predict(X_test))

print("Ridge Train MSE:", ridge_train_mse)
print("Ridge Test MSE:", ridge_test_mse)
print("Lasso Train MSE:", lasso_train_mse)
print("Lasso Test MSE:", lasso_test_mse)



Ridge Train MSE: 2422.7941765063774
Ridge Test MSE: 2900.373135453639
Lasso Train MSE: 2445.562933500697
Lasso Test MSE: 2779.773966197534


  model = cd_fast.enet_coordinate_descent(


In [49]:
#PART 2: CLASSIFICATION (Breast Cancer)
#Task 1: Load & Split Dataset

X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [50]:
#Task 2 – Step 1: Baseline Logistic Regression

log_reg = LogisticRegression(max_iter=10000)
log_reg.fit(X_train, y_train)

train_acc = accuracy_score(y_train, log_reg.predict(X_train))
test_acc = accuracy_score(y_test, log_reg.predict(X_test))

print("Baseline Logistic Regression")
print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)
print("Coefficients:", log_reg.coef_)


Baseline Logistic Regression
Training Accuracy: 0.9582417582417583
Test Accuracy: 0.956140350877193
Coefficients: [[ 1.0274368   0.22145051 -0.36213488  0.0254667  -0.15623532 -0.23771256
  -0.53255786 -0.28369224 -0.22668189 -0.03649446 -0.09710208  1.3705667
  -0.18140942 -0.08719575 -0.02245523  0.04736092 -0.04294784 -0.03240188
  -0.03473732  0.01160522  0.11165329 -0.50887722 -0.01555395 -0.016857
  -0.30773117 -0.77270908 -1.42859535 -0.51092923 -0.74689363 -0.10094404]]


In [18]:
#Step 2: Hyperparameter Tuning (L1 & L2)

param_grid = {
    'C': [0.01, 0.1, 1, 10],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

grid_log = GridSearchCV(
    LogisticRegression(max_iter=10000),
    param_grid,
    cv=5,
    scoring='accuracy'
)

grid_log.fit(X_train, y_train)
best_log = grid_log.best_estimator_

print("Best Parameters:", grid_log.best_params_)


Best Parameters: {'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}


In [17]:
#Step 3: Regularization Comparison
train_acc = accuracy_score(y_train, best_log.predict(X_train))
test_acc = accuracy_score(y_test, best_log.predict(X_test))

print("Optimized Logistic Regression")
print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)
print("Coefficients:", best_log.coef_)


Optimized Logistic Regression
Training Accuracy: 0.9692307692307692
Test Accuracy: 0.956140350877193
Coefficients: [[ 4.48835633  0.27196032 -0.51946446 -0.0074426  -0.72144586 -0.69521094
  -1.74176288 -1.6439379  -0.89114892  0.03691798 -0.31545108  3.35632861
  -0.88727336 -0.07335493 -0.11096036  0.80035637  0.92249598 -0.11617823
  -0.02500433  0.14611177  0.51777817 -0.6428525   0.16484437 -0.02768451
  -1.40296201 -1.61471735 -2.97856699 -2.6019605  -2.99483671 -0.02661842]]
