<a href="https://colab.research.google.com/github/BibikaShrestha/AI-Worksheets/blob/main/AIWorksheet7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.datasets import load_breast_cancer


## Part 1: Regression (Synthetic California-like Data)

In [None]:

np.random.seed(42)
X = np.random.rand(2000,8)
true_coef = np.array([3,-2,1.5,0,0,2,-1,0.5])
y = X.dot(true_coef) + np.random.randn(2000)*0.5

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
print(X_train.shape, X_test.shape)


(1600, 8) (400, 8)


In [None]:

lr=LinearRegression()
lr.fit(X_train,y_train)
print("Train MSE:",mean_squared_error(y_train,lr.predict(X_train)))
print("Test MSE:",mean_squared_error(y_test,lr.predict(X_test)))
print("Coefficients:",lr.coef_)


Train MSE: 0.2617890150825936
Test MSE: 0.25009874251843667
Coefficients: [ 3.02566612 -2.01817422  1.44880371 -0.0328139  -0.07964999  1.97887276
 -1.0506173   0.49854085]


In [None]:

params={'alpha':[0.01,0.1,1,10]}
ridge=GridSearchCV(Ridge(),params,cv=5)
lasso=GridSearchCV(Lasso(max_iter=5000),params,cv=5)
ridge.fit(X_train,y_train)
lasso.fit(X_train,y_train)
print("Best Ridge alpha:",ridge.best_params_)
print("Best Lasso alpha:",lasso.best_params_)


Best Ridge alpha: {'alpha': 0.1}
Best Lasso alpha: {'alpha': 0.01}


In [None]:

print("Ridge Test MSE:",mean_squared_error(y_test,ridge.best_estimator_.predict(X_test)))
print("Lasso Test MSE:",mean_squared_error(y_test,lasso.best_estimator_.predict(X_test)))


Ridge Test MSE: 0.25004414515468476
Lasso Test MSE: 0.25144471921590733


## Part 2: Classification (Breast Cancer)

In [None]:

X,y=load_breast_cancer(return_X_y=True)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)


In [None]:

log=LogisticRegression(max_iter=5000)
log.fit(X_train,y_train)
print("Baseline Train Acc:",accuracy_score(y_train,log.predict(X_train)))
print("Baseline Test Acc:",accuracy_score(y_test,log.predict(X_test)))


Baseline Train Acc: 0.9582417582417583
Baseline Test Acc: 0.956140350877193


In [None]:

params={'C':[0.01,0.1,1,10],'penalty':['l1','l2'],'solver':['liblinear']}
grid=GridSearchCV(LogisticRegression(max_iter=5000),params,cv=5)
grid.fit(X_train,y_train)
print("Best Params:",grid.best_params_)
best=grid.best_estimator_
print("Tuned Train Acc:",accuracy_score(y_train,best.predict(X_train)))
print("Tuned Test Acc:",accuracy_score(y_test,best.predict(X_test)))


Best Params: {'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}
Tuned Train Acc: 0.9692307692307692
Tuned Test Acc: 0.956140350877193
