<a href="https://colab.research.google.com/github/Tanishq7642/Machine-Learning-UML501-/blob/main/Assignment_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Assignment 5: Regression Analysis


### Question 1: Ridge Regression with Gradient Descent

**Objective:**
1. Generate a dataset with at least seven highly correlated columns.
2. Implement Ridge Regression using Gradient Descent Optimization.
3. Test different learning rates (0.0001, 0.001, 0.01, 0.1, 1, 10) with regularization parameter $10^{-5}$.
4. Choose the best parameters for minimum cost and maximum R2 score.

In [8]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# 1. Generate Data with high correlation (effective_rank < n_features creates correlation)
X, y = make_regression(n_samples=500, n_features=7, n_informative=7, effective_rank=2, noise=10, random_state=42)

print("Generated Data Shape:", X.shape)

# Preprocessing
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Add intercept column (bias)
X = np.c_[np.ones(X.shape[0]), X]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Gradient Descent Implementation
def ridge_gradient_descent(X, y, learning_rate, lambda_param, iterations=1000):
    m, n = X.shape
    weights = np.zeros(n)
    cost_history = []

    for i in range(iterations):
        # Prediction
        y_pred = X.dot(weights)

        # Error
        error = y_pred - y

        # Gradient
        gradient = (2/m) * X.T.dot(error) + (2 * lambda_param * weights)

        # Update weights
        weights = weights - learning_rate * gradient

        # Cost (MSE + Penalty)
        cost = np.mean(error**2) + lambda_param * np.sum(weights**2)
        cost_history.append(cost)

    return weights, cost_history

# Testing Learning Rates
learning_rates = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lambda_val = 1e-5

best_r2 = -float('inf')
best_lr = None
best_weights = None

print("\nTesting Learning Rates:")
for lr in learning_rates:
    weights, costs = ridge_gradient_descent(X_train, y_train, lr, lambda_val)

    # Evaluate
    y_pred_test = X_test.dot(weights)

    print(f"LR: {lr}, Final Cost: {costs[-1]:.4f}")


print(f"\nBest Learning Rate: {best_lr} with R2 Score: {best_r2:.4f}")

Generated Data Shape: (500, 7)

Testing Learning Rates:
LR: 0.0001, Final Cost: 95.0818
LR: 0.001, Final Cost: 93.9456
LR: 0.01, Final Cost: 93.7913
LR: 0.1, Final Cost: 93.7913
LR: 1, Final Cost: nan
LR: 10, Final Cost: nan

Best Learning Rate: None with R2 Score: -inf


  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  cost = np.mean(error**2) + lambda_param * np.sum(weights**2)


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Question 2: Hitters Dataset Analysis

**Objective:** Pre-process the Hitters data, perform scaling, and fit Linear, Ridge, and LASSO models.

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, RidgeCV, LassoCV
from sklearn.metrics import r2_score

# ---------------------------
# DATASET PREVIEW (NUMPY STYLE)
# ---------------------------
data_hitters = pd.read_csv('/content/drive/MyDrive/Hitters (1).csv')
print("Hitters Dataset (First 5 rows as numpy array):\n")
print(data_hitters.head().values)

# (a) Pre-process the data
# Remove rows with missing values
data_hitters = data_hitters.dropna()

# Convert categorical columns to numerical using One-Hot Encoding (drop_first to avoid dummy variable trap)
data_hitters = pd.get_dummies(data_hitters, drop_first=True)

# (b) Separate input (X) and output (y) and perform scaling
X_h = data_hitters.drop('Salary', axis=1)
y_h = data_hitters['Salary']

# Split into training and testing sets (70% train, 30% test)
X_train_h, X_test_h, y_train_h, y_test_h = train_test_split(X_h, y_h, test_size=0.3, random_state=42)

# Standard Scaling
scaler_h = StandardScaler()
X_train_h_scaled = scaler_h.fit_transform(X_train_h)
X_test_h_scaled = scaler_h.transform(X_test_h)

# (c) Fit Linear, Ridge, and LASSO models
# Linear Regression
lin_reg_h = LinearRegression()
lin_reg_h.fit(X_train_h_scaled, y_train_h)

# Ridge Regression (alpha = 0.5748)
ridge_reg_h = Ridge(alpha=0.5748)
ridge_reg_h.fit(X_train_h_scaled, y_train_h)

# LASSO Regression (alpha = 0.5748)
lasso_reg_h = Lasso(alpha=0.5748)
lasso_reg_h.fit(X_train_h_scaled, y_train_h)

# (d) Evaluate performance
y_pred_lin_h = lin_reg_h.predict(X_test_h_scaled)
y_pred_ridge_h = ridge_reg_h.predict(X_test_h_scaled)
y_pred_lasso_h = lasso_reg_h.predict(X_test_h_scaled)

score_lin_h = r2_score(y_test_h, y_pred_lin_h)
score_ridge_h = r2_score(y_test_h, y_pred_ridge_h)
score_lasso_h = r2_score(y_test_h, y_pred_lasso_h)

print("\n--- Question 2 Results ---")
print("Linear Regression R2:", score_lin_h)
print("Ridge Regression R2:", score_ridge_h)
print("LASSO Regression R2:", score_lasso_h)

print("\nBest Model for Hitters:")
if score_ridge_h > score_lin_h and score_ridge_h > score_lasso_h:
    print("Ridge performed best.")
elif score_lasso_h > score_lin_h and score_lasso_h > score_ridge_h:
    print("LASSO performed best.")
else:
    print("Linear Regression performed best.")

Hitters Dataset (First 5 rows as numpy array):

[[293 66 1 30 29 14 1 293 66 1 30 29 14 'A' 'E' 446 33 20 nan 'A']
 [315 81 7 24 38 39 14 3449 835 69 321 414 375 'N' 'W' 632 43 10 475.0
  'N']
 [479 130 18 66 72 76 3 1624 457 63 224 266 263 'A' 'W' 880 82 14 480.0
  'A']
 [496 141 20 65 78 37 11 5628 1575 225 828 838 354 'N' 'E' 200 11 3 500.0
  'N']
 [321 87 10 39 42 30 2 396 101 12 48 46 33 'N' 'E' 805 40 4 91.5 'N']]

--- Question 2 Results ---
Linear Regression R2: 0.38062339666128975
Ridge Regression R2: 0.401939851567225
LASSO Regression R2: 0.394986120390298

Best Model for Hitters:
Ridge performed best.


  model = cd_fast.enet_coordinate_descent(


### Question 3: Cross Validation

**Objective:** Explore Ridge Cross Validation (RidgeCV) and Lasso Cross Validation (LassoCV) to find the optimal alpha for the Hitters dataset and compare it with the alpha used in Q2 (0.5748).

In [5]:
# RidgeCV: Built-in Cross Validation for Ridge
# We test a range of alphas
alphas_to_test = [0.001, 0.01, 0.1, 0.5748, 1, 10, 100]

ridge_cv = RidgeCV(alphas=alphas_to_test, scoring='r2')
ridge_cv.fit(X_train_h_scaled, y_train_h)

# LassoCV: Built-in Cross Validation for Lasso
lasso_cv = LassoCV(alphas=alphas_to_test, cv=5, random_state=42)
lasso_cv.fit(X_train_h_scaled, y_train_h)

print("--- Question 3 Results ---")
print("Optimal Alpha found by RidgeCV:", ridge_cv.alpha_)
print("Optimal Alpha found by LassoCV:", lasso_cv.alpha_)
print("Alpha used in Q2: 0.5748")

print("\nComparison:")
if ridge_cv.alpha_ == 0.5748:
    print("RidgeCV chose the same alpha as Q2.")
else:
    print("RidgeCV chose a different alpha, suggesting 0.5748 might not be optimal.")


--- Question 3 Results ---
Optimal Alpha found by RidgeCV: 100.0
Optimal Alpha found by LassoCV: 10.0
Alpha used in Q2: 0.5748

Comparison:
RidgeCV chose a different alpha, suggesting 0.5748 might not be optimal.


  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


### Question 4: Boston Housing Dataset Analysis

**Objective:** Perform a similar regression analysis on the Boston Housing dataset.

In [7]:
# Load Boston Housing Data
data_boston = pd.read_csv('/content/drive/MyDrive/Boston_Housing.csv')

print("Boston Housing Dataset (First 5 rows as numpy array):\n")
print(data_boston.head().values)

# Pre-processing
# Check for nulls (usually none, but good practice)
data_boston = data_boston.dropna()

# Separate Input (X) and Output (y)
# 'MEDV' is the median value of owner-occupied homes (Target)
X_b = data_boston.drop('MEDV', axis=1)
y_b = data_boston['MEDV']

# Split Data
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(X_b, y_b, test_size=0.3, random_state=42)

# Scale Data
scaler_b = StandardScaler()
X_train_b_scaled = scaler_b.fit_transform(X_train_b)
X_test_b_scaled = scaler_b.transform(X_test_b)

# Fit Models
# Linear
lin_reg_b = LinearRegression()
lin_reg_b.fit(X_train_b_scaled, y_train_b)

# Ridge (Using Q2 alpha for consistency)
ridge_reg_b = Ridge(alpha=0.5748)
ridge_reg_b.fit(X_train_b_scaled, y_train_b)

# Lasso (Using Q2 alpha for consistency)
lasso_reg_b = Lasso(alpha=0.5748)
lasso_reg_b.fit(X_train_b_scaled, y_train_b)

# Evaluate
y_pred_lin_b = lin_reg_b.predict(X_test_b_scaled)
y_pred_ridge_b = ridge_reg_b.predict(X_test_b_scaled)
y_pred_lasso_b = lasso_reg_b.predict(X_test_b_scaled)

score_lin_b = r2_score(y_test_b, y_pred_lin_b)
score_ridge_b = r2_score(y_test_b, y_pred_ridge_b)
score_lasso_b = r2_score(y_test_b, y_pred_lasso_b)

print("\n--- Question 4 Results (Boston Housing) ---")
print("Linear Regression R2:", score_lin_b)
print("Ridge Regression R2:", score_ridge_b)
print("LASSO Regression R2:", score_lasso_b)

# Determine Best
print("\nBest Model for Boston Housing:")
if score_ridge_b > score_lin_b and score_ridge_b > score_lasso_b:
    print("Ridge performed best.")
elif score_lasso_b > score_lin_b and score_lasso_b > score_ridge_b:
    print("LASSO performed best.")
else:
    print("Linear Regression performed best.")

Boston Housing Dataset (First 5 rows as numpy array):

[[6.3200e-03 1.8000e+01 2.3100e+00 0.0000e+00 5.3800e-01 6.5750e+00
  6.5200e+01 4.0900e+00 1.0000e+00 2.9600e+02 1.5300e+01 3.9690e+02
  4.9800e+00 2.4000e+01]
 [2.7310e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 6.4210e+00
  7.8900e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9690e+02
  9.1400e+00 2.1600e+01]
 [2.7290e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 7.1850e+00
  6.1100e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9283e+02
  4.0300e+00 3.4700e+01]
 [3.2370e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 6.9980e+00
  4.5800e+01 6.0622e+00 3.0000e+00 2.2200e+02 1.8700e+01 3.9463e+02
  2.9400e+00 3.3400e+01]
 [6.9050e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 7.1470e+00
  5.4200e+01 6.0622e+00 3.0000e+00 2.2200e+02 1.8700e+01 3.9690e+02
  5.3300e+00 3.6200e+01]]

--- Question 4 Results (Boston Housing) ---
Linear Regression R2: 0.7112260057484932
Ridge Regression R2: 0.7109850548737549
LASSO Regress