<a href="https://colab.research.google.com/github/Nirdesh-arc/Nirdesh_Khadka_Concept_and_Tech_of_AI/blob/main/workshop7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_california_housing, load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score

In [2]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

X, y = load_diabetes(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training set:", X_train.shape)
print("Test set:", X_test.shape)

Training set: (353, 10)
Test set: (89, 10)


In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

lr = LinearRegression()
lr.fit(X_train, y_train)

train_pred = lr.predict(X_train)
test_pred = lr.predict(X_test)

print("Baseline Linear Regression")
print("Training MSE:", mean_squared_error(y_train, train_pred))
print("Test MSE:", mean_squared_error(y_test, test_pred))
print("Coefficients:", lr.coef_)

Baseline Linear Regression
Training MSE: 2868.549702835577
Test MSE: 2900.193628493482
Coefficients: [  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743]


In [4]:
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import GridSearchCV

ridge_params = {"alpha": [0.01, 0.1, 1, 10, 100]}
lasso_params = {"alpha": [0.001, 0.01, 0.1, 1, 10]}

ridge_grid = GridSearchCV(
    Ridge(), ridge_params, cv=5, scoring="neg_mean_squared_error"
)
lasso_grid = GridSearchCV(
    Lasso(max_iter=10000), lasso_params, cv=5, scoring="neg_mean_squared_error"
)

ridge_grid.fit(X_train, y_train)
lasso_grid.fit(X_train, y_train)

best_ridge = ridge_grid.best_estimator_
best_lasso = lasso_grid.best_estimator_

print("Best Ridge alpha:", ridge_grid.best_params_)
print("Best Lasso alpha:", lasso_grid.best_params_)

Best Ridge alpha: {'alpha': 0.1}
Best Lasso alpha: {'alpha': 0.1}


In [5]:
print("Ridge Test MSE:",
      mean_squared_error(y_test, best_ridge.predict(X_test)))

print("Lasso Test MSE:",
      mean_squared_error(y_test, best_lasso.predict(X_test)))

print("Number of zero coefficients in Lasso:",
      (best_lasso.coef_ == 0).sum())

Ridge Test MSE: 2856.4868876706537
Lasso Test MSE: 2798.193485169719
Number of zero coefficients in Lasso: 3


In [6]:
from sklearn.datasets import load_breast_cancer

X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training set:", X_train.shape)
print("Test set:", X_test.shape)

Training set: (455, 30)
Test set: (114, 30)


In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

log_reg = LogisticRegression(max_iter=5000)
log_reg.fit(X_train, y_train)

train_acc = accuracy_score(y_train, log_reg.predict(X_train))
test_acc = accuracy_score(y_test, log_reg.predict(X_test))

print("Baseline Logistic Regression")
print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)

Baseline Logistic Regression
Training Accuracy: 0.9582417582417583
Test Accuracy: 0.956140350877193


In [8]:
param_grid = {
    "C": [0.01, 0.1, 1, 10, 100],
    "penalty": ["l1", "l2"],
    "solver": ["liblinear"]
}

grid = GridSearchCV(
    LogisticRegression(max_iter=5000),
    param_grid,
    cv=5,
    scoring="accuracy"
)

grid.fit(X_train, y_train)

best_log = grid.best_estimator_
print("Best Parameters:", grid.best_params_)

Best Parameters: {'C': 100, 'penalty': 'l1', 'solver': 'liblinear'}


In [9]:
log_l1 = LogisticRegression(
    penalty="l1", C=best_log.C, solver="liblinear", max_iter=5000
)
log_l2 = LogisticRegression(
    penalty="l2", C=best_log.C, solver="liblinear", max_iter=5000
)

log_l1.fit(X_train, y_train)
log_l2.fit(X_train, y_train)

print("L1 Test Accuracy:", accuracy_score(y_test, log_l1.predict(X_test)))
print("L2 Test Accuracy:", accuracy_score(y_test, log_l2.predict(X_test)))

print("Zero coefficients (L1):", (log_l1.coef_ == 0).sum())
print("Zero coefficients (L2):", (log_l2.coef_ == 0).sum())

L1 Test Accuracy: 0.9824561403508771
L2 Test Accuracy: 0.956140350877193
Zero coefficients (L1): 9
Zero coefficients (L2): 0
