Q1. (Based on Step-by-Step Implementation of Ridge Regression using Gradient Descent Optimization)

Generate a dataset with atleast seven highly correlated columns and a target variable.

Implement Ridge Regression using Gradient Descent Optimization. Take different values of learning rate (such as 0.0001,0.001,0.01,0.1,1,10) and regularization parameter (10-15,10-10,10-5 ,10- 3 ,0,1,10,20).

Choose the best parameters for which ridge regression cost function is minimum and R2_score is maximum.

In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

np.random.seed(0)
n=500
z=np.random.randn(n)
X=np.column_stack([z+0.01*np.random.randn(n) for _ in range(7)])
X=np.column_stack([X,0.5*z+0.2*np.random.randn(n)])
true_w=np.array([3,-2,1,0,0.5,-1,2,4],dtype=float)
y=X.dot(true_w)+0.5*np.random.randn(n)

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=1)

m=X_train.mean(axis=0)
s=X_train.std(axis=0)
s=np.where(np.isfinite(s) & (s>0), s, 1.0)
X_train=(X_train-m)/s
X_test=(X_test-m)/s

def ridge(X,y,lr,lmbd,epochs=2000):
    X=X.astype(np.float64);y=y.astype(np.float64)
    m,n=X.shape
    w=np.zeros(n,dtype=np.float64); b=0.0
    for _ in range(epochs):
        p=X.dot(w)+b
        err=p-y
        grad_w=(2/m)*(X.T.dot(err))+2*lmbd*w
        grad_b=(2/m)*err.sum()
        w=w-lr*grad_w
        b=b-lr*grad_b
        if not (np.isfinite(w).all() and np.isfinite(b)):
            return None
    return w,b

lrs=[0.0001,0.001,0.01,0.1]
lmbds=[0,0.001,0.01,0.1,1,10]
best_r2=-1
best=None
for lr in lrs:
    for lmbd in lmbds:
        res=ridge(X_train,y_train,lr,lmbd)
        if res is None:
            continue
        w,b=res
        p=X_test.dot(w)+b
        if not np.isfinite(p).all():
            continue
        r2=r2_score(y_test,p)
        if r2>best_r2:
            best_r2=r2
            best=(lr,lmbd,r2)
print("Best LR, Lambda, R2 =",best)

Best LR, Lambda, R2 = (0.1, 0, 0.9920703853817209)


Q2 Load the Hitters dataset from the following link https://drive.google.com/file/d/1qzCKF6JKKMB0p7ul_lLy8tdmRk3vE_bG/view?usp=sharing

(a) Pre-process the data (null values, noise, categorical to numerical encoding)

(b) Separate input and output features and perform scaling

(c) Fit a Linear, Ridge (use regularization parameter as 0.5748), and LASSO (use regularization parameter as 0.5748) regression function on the dataset.

(d) Evaluate the performance of each trained model on test set. Which model performs the best and Why?

In [6]:
import pandas as pd
df=pd.read_csv("Hitters.csv")
df=df.dropna(subset=["Salary"])
df=df.fillna(df.median(numeric_only=True))
for c in ["League","Division","NewLeague"]:
    df[c]=df[c].astype("category").cat.codes

# (b) Split + Scale
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X=df.drop("Salary",axis=1)
y=df["Salary"]
Xtr,Xte,ytr,yte=train_test_split(X,y,test_size=0.25,random_state=0)
s = StandardScaler()
Xtr=s.fit_transform(Xtr)
Xte=s.transform(Xte)

# (c) Fit Linear, Ridge, Lasso
from sklearn.linear_model import LinearRegression, Ridge, Lasso
lr = LinearRegression().fit(Xtr,ytr)
ridge = Ridge(alpha=0.5748).fit(Xtr,ytr)
lasso = Lasso(alpha=0.5748,max_iter=5000).fit(Xtr,ytr)

# (d) Evaluate
from sklearn.metrics import mean_squared_error, r2_score
for n,m in [("Linear",lr),("Ridge",ridge),("Lasso",lasso)]:
    p = m.predict(Xte)
    print(n, mean_squared_error(yte,p), r2_score(yte,p))

Linear 131898.53472017133 0.5531784057871477
Ridge 128967.77296468553 0.5631067013714954
Lasso 128572.34347396728 0.5644462646639639


Q3 Cross Validation for Ridge and Lasso Regression Explore Ridge Cross Validation (RidgeCV) and Lasso Cross Validation (LassoCV) function of Python. Implement both on Boston House Prediction Dataset (load_boston dataset from sklearn.datasets).

In [3]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import r2_score
import numpy as np

b = fetch_openml("boston", version=1, as_frame=False)
X, y = b.data, b.target.astype(float)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

alphas = np.logspace(-6, 6, 13)

ridge = RidgeCV(alphas=alphas, cv=5).fit(X_train, y_train)
lasso = LassoCV(alphas=alphas, cv=5, max_iter=5000).fit(X_train, y_train)

print("Ridge best alpha:", ridge.alpha_)
print("Ridge R2:", r2_score(y_test, ridge.predict(X_test)))
print("Lasso best alpha:", lasso.alpha_)
print("Lasso R2:", r2_score(y_test, lasso.predict(X_test)))

Ridge best alpha: 10.0
Ridge R2: 0.7775108393295397
Lasso best alpha: 0.01
Lasso R2: 0.7787621490259894


Q4 Multiclass Logistic Regression: Implement Multiclass Logistic Regression (step-by step) on Iris dataset using one vs. rest strategy?



In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, classification_report

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = OneVsRestClassifier(LogisticRegression(max_iter=2000))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9111111111111111

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       0.94      0.83      0.88        18
           2       0.77      0.91      0.83        11

    accuracy                           0.91        45
   macro avg       0.90      0.91      0.91        45
weighted avg       0.92      0.91      0.91        45

