# 3 Part 1: Regression Task (California Housing)

In [11]:
import numpy as np
from sklearn.datasets import load_diabetes
#Califronia Housing Data not working
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error

3.1 Task 1: Load and Split Dataset

In [12]:
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

– Step 1: Baseline Model (No Regularization) Build a Linear Regression model without
any regularization.

In [14]:
print("=== Baseline Linear Regression ===")
print("Coefficients : ", lin_reg.coef_)
print("Intercept : ", lin_reg.intercept_)

=== Baseline Linear Regression ===
Coefficients :  [  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743]
Intercept :  151.34560453985995


In [15]:
y_train_pred = lin_reg.predict(X_train)
y_test_pred = lin_reg.predict(X_test)

In [16]:
print("Train Mean Squared Error : ", mean_squared_error(y_train, y_train_pred))
print("Test Mean Squared Error : ", mean_squared_error(y_test, y_test_pred))

Train Mean Squared Error :  2868.549702835577
Test Mean Squared Error :  2900.193628493482


In [17]:
alpha_grid = {"alpha" : np.logspace(-3, 0, 13)}

In [18]:
ridge = Ridge(random_state = 42)
lasso = Lasso(random_state = 42, max_iter = 10000)

In [19]:
ridge_cv = GridSearchCV(
    ridge, alpha_grid, cv = 5, scoring = "neg_mean_squared_error", n_jobs = -1
)
lasso_cv = GridSearchCV(
    lasso, alpha_grid, cv = 5, scoring = "neg_mean_squared_error", n_jobs = -1
)

In [20]:
ridge_cv.fit(X_train, y_train)
lasso_cv.fit(X_train, y_train)

– Step 2: Hyperparameter Tuning Use GridSearchCV or RandomizedSearchCV to tune
hyperparameters for Ridge and Lasso regression models.

In [21]:
print("\n=== Hyperparameter Tuning Results ===")
print("Best Ridge Alpha : ", ridge_cv.best_params_["alpha"])
print("Best Ridge CV MSE : ", -ridge_cv.best_score_)
print("Lasso Best Alpha : ", lasso_cv.best_params_["alpha"])
print("Lasso Best CV MSE : ", -lasso_cv.best_score_)


=== Hyperparameter Tuning Results ===
Best Ridge Alpha :  0.05623413251903491
Best Ridge CV MSE :  3122.185727902014
Lasso Best Alpha :  0.05623413251903491
Lasso Best CV MSE :  3128.575592525761


In [22]:
best_ridge = ridge_cv.best_estimator_
best_lasso = lasso_cv.best_estimator_

In [23]:
ridge_train_pred = best_ridge.predict(X_train)
ridge_test_pred = best_ridge.predict(X_test)
lasso_train_pred = best_lasso.predict(X_train)
lasso_test_pred = best_lasso.predict(X_test)


– Step 3: Regularization Experiments (L1 vs L2) Train L1 (Lasso) and L2 (Ridge) regression models using the optimal hyperparameters.

In [24]:
print("\n=== Ridge(L2) with best alpha ===")
print("Coefficients : ", best_ridge.coef_)
print("Train MSE : ", mean_squared_error(y_train, ridge_train_pred))
print("Test MSE : ", mean_squared_error(y_test, ridge_test_pred))


=== Ridge(L2) with best alpha ===
Coefficients :  [  42.33215803 -219.70224625  525.84455459  327.90895925 -155.27747239
  -65.55682386 -176.83023234  160.61559898  422.12347747   70.18920108]
Train MSE :  2899.0575672882524
Test MSE :  2864.6382255813583


In [35]:
print("\n === Lasso(L1) with best alpha ===")
print("Coefficients : ", best_lasso.coef_)
print("Train MSE : ", mean_squared_error(y_train, lasso_train_pred))
print("Test MSE : ", mean_squared_error(y_test, lasso_test_pred))


 === Lasso(L1) with best alpha ===
Coefficients :  [   8.6963877  -187.0826068   554.88504012  320.41189319 -148.25316407
   -0.         -209.30900722   43.99621921  470.76873329   42.28598551]
Train MSE :  2907.1621670305
Test MSE :  2814.3876739418333


# 4 Part 2: Classification Task (Breast Cancer)

4.1 Task 1: Load and Split Dataset

In [26]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV

In [27]:
X, Y = load_breast_cancer(return_X_y=True)

X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)

Task 2: Complete all the Task

– Step 1: Baseline Model (No Regularization) Build a Logistic Regression model without specifying any regularization (default settings).

In [28]:
baseline_model = LogisticRegression(max_iter=10000)
baseline_model.fit(X_train, Y_train)

print("Baseline coefficients : \n", baseline_model.coef_)
print("Baseline train acc : ", accuracy_score(Y_train, baseline_model.predict(X_train)))
print("Baseline test acc:", accuracy_score(Y_test, baseline_model.predict(X_test)))



Baseline coefficients : 
 [[ 1.0274368   0.22145051 -0.36213488  0.0254667  -0.15623532 -0.23771256
  -0.53255786 -0.28369224 -0.22668189 -0.03649446 -0.09710208  1.3705667
  -0.18140942 -0.08719575 -0.02245523  0.04736092 -0.04294784 -0.03240188
  -0.03473732  0.01160522  0.11165329 -0.50887722 -0.01555395 -0.016857
  -0.30773117 -0.77270908 -1.42859535 -0.51092923 -0.74689363 -0.10094404]]
Baseline train acc :  0.9582417582417583
Baseline test acc: 0.956140350877193


– Step 2: Hyperparameter Tuning Use GridSearchCV or RandomizedSearchCV to tune
hyperparameters for logistic regression models with regularization.

In [29]:
parameter = {
    "penalty": ["l1", "l2"],
    "C": np.logspace(-3, 3, 7),
    "solver": ["liblinear"],
}

logistic_regression = LogisticRegression(max_iter=10000)

grid = GridSearchCV(logistic_regression, parameter, cv=5, scoring="accuracy", n_jobs = -1)
grid.fit(X_train, Y_train)

print("\nBest parameters : ", grid.best_params_)
print("Best CV acc : ", grid.best_score_)
print("Test acc (best model) : ",
      accuracy_score(Y_test, grid.best_estimator_.predict(X_test)))



Best parameters :  {'C': np.float64(100.0), 'penalty': 'l1', 'solver': 'liblinear'}
Best CV acc :  0.9670329670329672
Test acc (best model) :  0.9824561403508771


Step 3: Regularization Experiments (L1 vs L2)

In [34]:
best_C = grid.best_params_["C"]

# L1 model
logistic_regression_l1 = LogisticRegression(penalty="l1", C=best_C,
                               solver="liblinear", max_iter=10000)
logistic_regression_l1.fit(X_train, Y_train)

print("\nL1 coefficients:\n", logistic_regression_l1.coef_)
print("L1 zero coeffs:", np.sum(logistic_regression_l1.coef_ == 0))
print("L1 train acc:", accuracy_score(Y_train, logistic_regression_l1.predict(X_train)))
print("L1 test acc:", accuracy_score(Y_test, logistic_regression_l1.predict(X_test)))

# L2 model
logistic_regression_l2 = LogisticRegression(penalty="l2", C=best_C,
                               solver="liblinear", max_iter=10000)
logistic_regression_l2.fit(X_train, Y_train)

print("\nL2 coefficients:\n", logistic_regression_l2.coef_)
print("L2 zero coeffs:", np.sum(logistic_regression_l2.coef_ == 0))
print("L2 train acc:", accuracy_score(Y_train, logistic_regression_l2.predict(X_train)))
print("L2 test acc:", accuracy_score(Y_test, logistic_regression_l2.predict(X_test)))



L1 coefficients:
 [[ 7.24941654e-01 -1.08602885e-01  1.01240124e-01 -2.29586710e-03
   0.00000000e+00  4.71032473e+01 -1.19589063e+01 -1.36203218e+02
   1.97612169e+01  0.00000000e+00  0.00000000e+00  1.73151431e+00
   0.00000000e+00 -1.97614847e-01  0.00000000e+00  0.00000000e+00
   5.01492282e+01  0.00000000e+00  1.89702289e+01  0.00000000e+00
   2.28621153e-01 -4.38563833e-01  5.69598790e-02 -2.11959602e-02
  -2.15740604e+01  7.97124779e+00 -1.45905449e+01 -2.53329805e+01
  -2.53758072e+01  0.00000000e+00]]
L1 zero coeffs: 9
L1 train acc: 0.989010989010989
L1 test acc: 0.9824561403508771

L2 coefficients:
 [[ 5.40272741  0.26573732 -0.52651203 -0.02095479 -2.29922151 -0.2169419
  -3.56980218 -5.0110607  -2.26418385  0.36762747 -0.58036525  3.84169255
  -0.63875637 -0.10714305 -0.40591246  3.57237002  4.36577514 -0.26636925
   0.39354765  0.62630698 -0.20718582 -0.68999884  0.17661962 -0.01813087
  -4.67716843 -0.0140846  -4.46493229 -7.61271125 -6.83571871  0.57463274]]
L2 zero coe