In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.datasets import fetch_california_housing, load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report, confusion_matrix

#Part 1:

Task - 1



In [3]:
url = "https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.csv"

columns = [
    "longitude", "latitude", "housing_median_age", "total_rooms",
    "total_bedrooms", "population", "households",
    "median_income", "median_house_value", "ocean_proximity"
]

data_california = pd.read_csv(url, names=columns, header=0)

X = data_california.drop("median_house_value", axis=1)
y = data_california["median_house_value"]

X = pd.get_dummies(X, drop_first=True)
X = X.fillna(X.mean())

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Task-2 [Step 1]

In [4]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

y_train_pred = lin_reg.predict(X_train)
y_test_pred = lin_reg.predict(X_test)

train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print("Training MSE:", train_mse)
print("Test MSE:", test_mse)
print("Coefficients:", lin_reg.coef_)

Training MSE: 4683203783.504253
Test MSE: 4904399775.949275
Coefficients: [-53826.64801649 -54415.6961445   13889.86618856 -13094.25116219
  43068.18184187 -43403.43242732  18382.19632373  75167.77476625
 -18506.09522509   2118.43757154  -1621.70501786   1139.90559187]


Task-2 [Step 2]

In [5]:
ridge = Ridge()

param_grid = {'alpha': [0.01, 0.1, 1, 10, 100]}

ridge_cv = GridSearchCV(ridge, param_grid, cv=5, scoring='neg_mean_squared_error')
ridge_cv.fit(X_train, y_train)

best_ridge = ridge_cv.best_estimator_

ridge_test_pred = best_ridge.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_test_pred)

print("Best alpha (Ridge):", ridge_cv.best_params_)
print("Test MSE (Ridge):", ridge_mse)


Best alpha (Ridge): {'alpha': 10}
Test MSE (Ridge): 4900527988.467714


Task 2 [Step 3]

In [6]:
lasso = Lasso(max_iter=10000)

param_grid = {'alpha': [0.001, 0.01, 0.1, 1]}

lasso_cv = GridSearchCV(lasso, param_grid, cv=5, scoring='neg_mean_squared_error')
lasso_cv.fit(X_train, y_train)

best_lasso = lasso_cv.best_estimator_

lasso_test_pred = best_lasso.predict(X_test)
lasso_mse = mean_squared_error(y_test, lasso_test_pred)

print("Best alpha (Lasso):", lasso_cv.best_params_)
print("Test MSE (Lasso):", lasso_mse)


Best alpha (Lasso): {'alpha': 1}
Test MSE (Lasso): 4904310967.020979


Comparision between L1 and L2 Regularization

In [7]:
print("Ridge Coefficients:\n", best_ridge.coef_)
print("Lasso Coefficients:\n", best_lasso.coef_)

Ridge Coefficients:
 [-52721.63130979 -53268.49748635  13895.5886638  -12744.50320299
  42198.58535107 -43266.01886403  18788.75630925  75067.52780418
 -18825.45626186   2126.29995352  -1567.66754263   1205.19132773]
Lasso Coefficients:
 [-53797.75028931 -54386.67369765  13889.19876917 -13073.22587596
  43045.41252989 -43396.11340072  18377.55734347  75162.82671862
 -18514.69246299   2117.64610678  -1618.42318951   1140.96135222]


In [8]:
ridge_train_mse = mean_squared_error(y_train, best_ridge.predict(X_train))
lasso_train_mse = mean_squared_error(y_train, best_lasso.predict(X_train))

print("Ridge Train MSE:", ridge_train_mse)
print("Ridge Test MSE:", ridge_mse)



Ridge Train MSE: 4683300834.430685
Ridge Test MSE: 4900527988.467714


In [9]:
print("Lasso Train MSE:", lasso_train_mse)
print("Lasso Test MSE:", lasso_mse)

Lasso Train MSE: 4683203898.029834
Lasso Test MSE: 4904310967.020979


How L1 and L2 help reduce overfitting:

L1 (Lasso) and L2 (Ridge) regularization help control model complexity. L2 combats overfitting by shrinking all coefficients, ensuring that no single feature dominates the model. L1 combats overfitting by eliminating less important features altogether by setting their coefficients to zero. Together, these techniques improve the model’s ability to generalize to unseen data.

How too much regularization causes underfitting:

When regularization is applied too strongly, the model becomes overly simple. Important features may be ignored or assigned very small weights, preventing the model from learning the true patterns in the data. This leads to underfitting and higher error


#Part 2:

Task 1


In [10]:
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


Task 2 [Step 1]

In [11]:
log_reg = LogisticRegression(max_iter=20000)
log_reg.fit(X_train, y_train)

train_acc = accuracy_score(y_train, log_reg.predict(X_train))
test_acc = accuracy_score(y_test, log_reg.predict(X_test))

print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)
print("Coefficients:", log_reg.coef_)


Training Accuracy: 0.9582417582417583
Test Accuracy: 0.956140350877193
Coefficients: [[ 1.0274368   0.22145051 -0.36213488  0.0254667  -0.15623532 -0.23771256
  -0.53255786 -0.28369224 -0.22668189 -0.03649446 -0.09710208  1.3705667
  -0.18140942 -0.08719575 -0.02245523  0.04736092 -0.04294784 -0.03240188
  -0.03473732  0.01160522  0.11165329 -0.50887722 -0.01555395 -0.016857
  -0.30773117 -0.77270908 -1.42859535 -0.51092923 -0.74689363 -0.10094404]]


Task 2 [Step 2]


In [12]:
param_grid = {
    'C': [0.01, 0.1, 1, 10],
    'penalty': ['l1', 'l2']
}

log_cv = GridSearchCV(
    LogisticRegression(solver='liblinear', max_iter=20000),
    param_grid,
    cv=5,
    scoring='accuracy'
)

log_cv.fit(X_train, y_train)

best_log = log_cv.best_estimator_

print("Best Parameters:", log_cv.best_params_)


Best Parameters: {'C': 10, 'penalty': 'l2'}


In [13]:
test_acc = accuracy_score(y_test, best_log.predict(X_test))
print("Test Accuracy:", test_acc)


Test Accuracy: 0.956140350877193


Task 2 [Step 3]

In [14]:
log_l1 = LogisticRegression(C=best_log.C * 1.1, penalty='l1',
                            solver='liblinear', max_iter=20000)
log_l2 = LogisticRegression(C=best_log.C * 1.1, penalty='l2',
                            solver='liblinear', max_iter=20000)

log_l1.fit(X_train, y_train)
log_l2.fit(X_train, y_train)


In [15]:
print("L1 Train Accuracy:", accuracy_score(y_train, log_l1.predict(X_train)))
print("L1 Test Accuracy:", accuracy_score(y_test, log_l1.predict(X_test)))

print("L2 Train Accuracy:", accuracy_score(y_train, log_l2.predict(X_train)))
print("L2 Test Accuracy:", accuracy_score(y_test, log_l2.predict(X_test)))


L1 Train Accuracy: 0.9824175824175824
L1 Test Accuracy: 0.9736842105263158
L2 Train Accuracy: 0.9692307692307692
L2 Test Accuracy: 0.956140350877193


In [16]:
print("L1 Coefficients:\n", log_l1.coef_)
print("L2 Coefficients:\n", log_l2.coef_)


L1 Coefficients:
 [[ 1.41290430e+00  1.68839615e-01 -9.08215018e-03 -1.19970886e-02
   0.00000000e+00  0.00000000e+00  0.00000000e+00 -2.12785932e+01
   0.00000000e+00  0.00000000e+00  0.00000000e+00  3.16416156e+00
  -8.79208555e-01 -9.07947450e-02  0.00000000e+00  0.00000000e+00
   4.86047406e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   7.95672279e-01 -5.93479518e-01  1.41000658e-01 -2.66340787e-02
   0.00000000e+00  0.00000000e+00 -2.86706265e+00 -2.94725226e+01
  -6.86876036e+00  0.00000000e+00]]
L2 Coefficients:
 [[ 4.21729002  0.29572995 -0.47933805 -0.00811959 -0.49748382 -0.80762754
  -1.51908544 -1.12097361 -0.68163894 -0.02982032 -0.29680157  3.56154387
  -0.94452809 -0.07312736 -0.0661052   0.34922705  0.35208529 -0.0933521
  -0.08114754  0.07170713  0.49715269 -0.66632842  0.17640944 -0.02758521
  -0.94540952 -2.21709329 -3.18452706 -1.87797953 -2.4140247  -0.16960667]]


How L1 and L2 help reduce overfitting:

L2 regularization reduces overfitting by shrinking the weights of all features, which leads to a smoother and more stable decision boundary. L1 regularization reduces overfitting by driving the weights of less important features to zero, effectively removing them and simplifying the model. Together, they help prevent the model from memorizing the training data and improve its ability to generalize.

How strong regularization reduces accuracy:

If regularization is too strong, the model becomes overly simplistic. Key features may be suppressed or ignored, preventing the model from properly separating classes. As a result, bias increases and performance drops on both the training and test datasets.