# **Assignment 5**

Q1. (Based on Step-by-Step Implementation of Ridge Regression using Gradient
Descent Optimization)
Generate a dataset with atleast seven highly correlated columns and a target variable.
Implement Ridge Regression using Gradient Descent Optimization. Take different values of learning rate (such as 0.0001,0.001,0.01,0.1,1,10) and regularization parameter (10^-15,10^-10,10^-5 ,10^-3
,0,1,10,20). Choose the best parameters for which ridge
regression cost function is minimum and R2_score is maximum.

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score

# Generate synthetic data
np.random.seed(42)
X = np.random.rand(100, 7)
y = 3*X[:,0] + 2*X[:,1] + X[:,2] + np.random.randn(100) * 0.1
X = np.hstack([X, X[:, [0]] * 0.9])  # add correlation

# Ridge Regression via Gradient Descent
def ridge_gradient_descent(X, y, lr, lam, iterations=1000):
    n, m = X.shape
    X = np.c_[np.ones((n, 1)), X]
    w = np.zeros(m+1)
    for _ in range(iterations):
        y_pred = X.dot(w)
        grad = (-2/n)*X.T.dot(y - y_pred) + 2*lam*w
        grad[0] -= 2*lam*w[0]  # no regularization for bias
        w -= lr * grad

        # stop if weights blow up
        if np.any(np.isnan(w)) or np.any(np.isinf(w)):
            return w, np.inf
    cost = np.mean((y - y_pred)**2) + lam*np.sum(w**2)
    return w, cost

learning_rates = [0.0001, 0.001, 0.01, 0.1, 1]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]
best = None

for lr in learning_rates:
    for lam in lambdas:
        w, cost = ridge_gradient_descent(X, y, lr, lam)
        if np.isinf(cost):  # skip broken combos
            continue
        y_pred = np.c_[np.ones((X.shape[0],1)), X].dot(w)
        if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)):
            continue
        r2 = r2_score(y, y_pred)
        if not best or r2 > best['r2']:
            best = {'lr': lr, 'lam': lam, 'r2': r2, 'cost': cost}

print("Best Parameters:", best)


Best Parameters: {'lr': 0.1, 'lam': 1e-15, 'r2': 0.9926028747850965, 'cost': np.float64(0.008990246761538433)}


  cost = np.mean((y - y_pred)**2) + lam*np.sum(w**2)


Q2. Load the Hitters dataset from the following link
https://drive.google.com/file/d/1qzCKF6JKKMB0p7ul_lLy8tdmRk3vE_bG/view?usp=sharing

(a) Pre-process the data (null values, noise, categorical to numerical encoding)

(b) Separate input and output features and perform scaling

(c) Fit a Linear, Ridge (use regularization parameter as 0.5748), and LASSO (use regularization parameter as 0.5748) regression function on the dataset.

(d) Evaluate the performance of each trained model on test set. Which model
performs the best and Why?

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score

# Load data
data = pd.read_csv('Hitters.csv')
data = data.dropna()

# Encode categorical features
data = pd.get_dummies(data, drop_first=True)

# Split
X = data.drop("Salary", axis=1)
y = data["Salary"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Models
models = {
    "Linear": LinearRegression(),
    "Ridge": Ridge(alpha=0.5748),
    "Lasso": Lasso(alpha=0.5748)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    print(f"{name} R2 Score:", r2_score(y_test, pred))

Linear R2 Score: 0.16769360190025295
Ridge R2 Score: 0.169752366183749
Lasso R2 Score: 0.14682108115827563


Q3. Cross Validation for Ridge and Lasso Regression
Explore Ridge Cross Validation (RidgeCV) and Lasso Cross Validation (LassoCV)function of Python. Implement both on Boston House Prediction Dataset (load_boston dataset from sklearn.datasets).

In [9]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import warnings
warnings.filterwarnings("ignore")

# Load California housing dataset instead of Boston
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Ridge and Lasso with cross-validation
ridge_cv = RidgeCV(alphas=[0.1, 1, 10])
ridge_cv.fit(X_train, y_train)

lasso_cv = LassoCV(alphas=[0.1, 1, 10])
lasso_cv.fit(X_train, y_train)

# Results
print("Best Ridge alpha:", ridge_cv.alpha_)
print("Ridge R2:", r2_score(y_test, ridge_cv.predict(X_test)))

print("Best Lasso alpha:", lasso_cv.alpha_)
print("Lasso R2:", r2_score(y_test, lasso_cv.predict(X_test)))


Best Ridge alpha: 10.0
Ridge R2: 0.5764371557026509
Best Lasso alpha: 0.1
Lasso R2: 0.5318167610318159


Q4. Multiclass Logistic Regression: Implement Multiclass Logistic Regression (step-by step) on Iris dataset using one vs. rest strategy?

In [7]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def train_one_vs_rest(X, y, cls, lr=0.01, iters=1000):
    y_bin = np.where(y == cls, 1, 0)
    w = np.zeros(X.shape[1])
    b = 0
    for _ in range(iters):
        z = np.dot(X, w) + b
        y_pred = sigmoid(z)
        dw = np.dot(X.T, (y_pred - y_bin)) / len(y)
        db = np.mean(y_pred - y_bin)
        w -= lr * dw
        b -= lr * db
    return w, b

weights, biases = [], []
for cls in np.unique(y_train):
    w, b = train_one_vs_rest(X_train, y_train, cls)
    weights.append(w)
    biases.append(b)

def predict(X):
    probs = [sigmoid(X.dot(w) + b) for w, b in zip(weights, biases)]
    return np.argmax(probs, axis=0)

y_pred = predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.9
