## Q1: Ridge Regression with Gradient Descent Optimization

In [None]:
# Generate synthetic dataset with highly correlated features
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
np.random.seed(42)
n = 200
X = np.random.rand(n, 7)
X[:, 1] = X[:, 0] + np.random.normal(0, 0.01, n)  # high correlation
X[:, 2] = X[:, 0] - np.random.normal(0, 0.01, n)
y = 3*X[:, 0] + 2*X[:, 1] - X[:, 2] + np.random.normal(0, 0.1, n)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Implement Ridge Regression with Gradient Descent
def ridge_gd(X, y, lr, reg, n_iter=1000):
    m, n = X.shape
    beta = np.zeros(n)
    for _ in range(n_iter):
        y_pred = X @ beta
        grad = -2 * X.T @ (y - y_pred) / m + 2 * reg * beta
        beta -= lr * grad
    return beta
# Try different learning rates and regularization parameters
lrs = [0.0001, 0.001, 0.01, 0.1, 1, 10]
regs = [1e-15, 1e-10, 1e-5, 1e-3, 0.1, 1, 10, 20]
from sklearn.metrics import r2_score
best_r2 = -np.inf
best_params = None
for lr in lrs:
    for reg in regs:
        beta = ridge_gd(X_scaled, y, lr, reg)
        y_pred = X_scaled @ beta
        r2 = r2_score(y, y_pred)
        if r2 > best_r2:
            best_r2 = r2
            best_params = (lr, reg)
best_params, best_r2

## Q2: Hitters Dataset - Linear, Ridge, and LASSO Regression

In [None]:
# Load and preprocess Hitters dataset
df = pd.read_csv('Hitters.csv')  # Update with actual path
df = df.dropna()
df = pd.get_dummies(df, drop_first=True)
X = df.drop('Salary', axis=1)
y = df['Salary']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Fit Linear, Ridge, and LASSO regression
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
lin = LinearRegression().fit(X_train, y_train)
ridge = Ridge(alpha=0.5748).fit(X_train, y_train)
lasso = Lasso(alpha=0.5748).fit(X_train, y_train)
y_pred_lin = lin.predict(X_test)
y_pred_ridge = ridge.predict(X_test)
y_pred_lasso = lasso.predict(X_test)
r2_lin = r2_score(y_test, y_pred_lin)
r2_ridge = r2_score(y_test, y_pred_ridge)
r2_lasso = r2_score(y_test, y_pred_lasso)
r2_lin, r2_ridge, r2_lasso

## Q3: Cross Validation for Ridge and Lasso Regression

In [None]:
# RidgeCV and LassoCV on Boston dataset
from sklearn.datasets import load_boston
from sklearn.linear_model import RidgeCV, LassoCV
boston = load_boston()
X, y = boston.data, boston.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
ridgecv = RidgeCV(alphas=[0.1, 1, 10, 100], cv=5).fit(X_scaled, y)
lassocv = LassoCV(alphas=[0.1, 1, 10, 100], cv=5, max_iter=10000).fit(X_scaled, y)
ridgecv.alpha_, lassocv.alpha_

## Q4: Multiclass Logistic Regression (Iris Dataset)

In [None]:
# One-vs-rest Multiclass Logistic Regression on Iris
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
iris = load_iris()
X, y = iris.data, iris.target
model = LogisticRegression(multi_class='ovr', max_iter=200)
model.fit(X, y)
score = model.score(X, y)
score