Part 1: Regression Task

Task 1:

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing, load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score
X, y = fetch_california_housing(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training size:", X_train.shape)
print("Test size:", X_test.shape)


Training size: (16512, 8)
Test size: (4128, 8)


Task 2:


In [2]:
#step 1
# Create model
lr = LinearRegression()

# Train
lr.fit(X_train, y_train)

# Predictions
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

# MSE
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print("Training MSE:", train_mse)
print("Test MSE:", test_mse)

# Coefficients
print("Coefficients:", lr.coef_)

#step 2
#L2
ridge = Ridge()

param_grid = {'alpha': [0.01, 0.1, 1, 10, 100]}

ridge_cv = GridSearchCV(
    ridge,
    param_grid,
    cv=5,
    scoring='neg_mean_squared_error'
)

ridge_cv.fit(X_train, y_train)

print("Best alpha (Ridge):", ridge_cv.best_params_)

#L1
lasso = Lasso(max_iter=10000)

param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10]}

lasso_cv = GridSearchCV(
    lasso,
    param_grid,
    cv=5,
    scoring='neg_mean_squared_error'
)

lasso_cv.fit(X_train, y_train)

print("Best alpha (Lasso):", lasso_cv.best_params_)

#Step 3
#Ridge Model
best_ridge = ridge_cv.best_estimator_

ridge_train_mse = mean_squared_error(
    y_train, best_ridge.predict(X_train)
)
ridge_test_mse = mean_squared_error(
    y_test, best_ridge.predict(X_test)
)

print("Ridge Training MSE:", ridge_train_mse)
print("Ridge Test MSE:", ridge_test_mse)

#Lasso Model
best_lasso = lasso_cv.best_estimator_

lasso_train_mse = mean_squared_error(
    y_train, best_lasso.predict(X_train)
)
lasso_test_mse = mean_squared_error(
    y_test, best_lasso.predict(X_test)
)

print("Lasso Training MSE:", lasso_train_mse)
print("Lasso Test MSE:", lasso_test_mse)

print("Ridge coefficients:", best_ridge.coef_)
print("Lasso coefficients:", best_lasso.coef_)





Training MSE: 0.5179331255246699
Test MSE: 0.5558915986952422
Coefficients: [ 4.48674910e-01  9.72425752e-03 -1.23323343e-01  7.83144907e-01
 -2.02962058e-06 -3.52631849e-03 -4.19792487e-01 -4.33708065e-01]
Best alpha (Ridge): {'alpha': 10}
Best alpha (Lasso): {'alpha': 0.001}
Ridge Training MSE: 0.5179417369316771
Ridge Test MSE: 0.5550405537342994
Lasso Training MSE: 0.5179897619159226
Lasso Test MSE: 0.5538940157172418
Ridge coefficients: [ 4.47068597e-01  9.74130199e-03 -1.20293353e-01  7.66201258e-01
 -1.99123989e-06 -3.52184780e-03 -4.19720067e-01 -4.33421866e-01]
Lasso coefficients: [ 4.44698243e-01  9.78271615e-03 -1.15737191e-01  7.41172257e-01
 -1.85738492e-06 -3.50964130e-03 -4.18521643e-01 -4.31828076e-01]


Part 2: Classification Task

Task 1:

In [4]:
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


Task 2:

In [5]:
#Step 1
log_reg = LogisticRegression(max_iter=10000)

log_reg.fit(X_train, y_train)

train_acc = accuracy_score(y_train, log_reg.predict(X_train))
test_acc = accuracy_score(y_test, log_reg.predict(X_test))

print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)

print("Coefficients:", log_reg.coef_)

#Step 2
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']  # supports l1 & l2
}

log_cv = GridSearchCV(
    LogisticRegression(max_iter=10000),
    param_grid,
    cv=5,
    scoring='accuracy'
)

log_cv.fit(X_train, y_train)

print("Best parameters:", log_cv.best_params_)

#Step 3
best_log = log_cv.best_estimator_

train_acc = accuracy_score(y_train, best_log.predict(X_train))
test_acc = accuracy_score(y_test, best_log.predict(X_test))

print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)
print("Coefficients:", best_log.coef_)



Training Accuracy: 0.9582417582417583
Test Accuracy: 0.956140350877193
Coefficients: [[ 1.0274368   0.22145051 -0.36213488  0.0254667  -0.15623532 -0.23771256
  -0.53255786 -0.28369224 -0.22668189 -0.03649446 -0.09710208  1.3705667
  -0.18140942 -0.08719575 -0.02245523  0.04736092 -0.04294784 -0.03240188
  -0.03473732  0.01160522  0.11165329 -0.50887722 -0.01555395 -0.016857
  -0.30773117 -0.77270908 -1.42859535 -0.51092923 -0.74689363 -0.10094404]]
Best parameters: {'C': 100, 'penalty': 'l1', 'solver': 'liblinear'}
Training Accuracy: 0.989010989010989
Test Accuracy: 0.9824561403508771
Coefficients: [[ 7.17343953e-01 -1.08353896e-01  1.05930427e-01 -2.52295598e-03
   0.00000000e+00  4.71697402e+01 -1.22954477e+01 -1.35822113e+02
   1.97475914e+01  0.00000000e+00  0.00000000e+00  1.73331791e+00
  -1.97853812e-04 -1.97748571e-01  0.00000000e+00  0.00000000e+00
   5.03302688e+01  0.00000000e+00  1.84074366e+01  0.00000000e+00
   1.42322035e-01 -4.39003426e-01  5.81632224e-02 -2.04419016e-