# Assignment 5

### Importing Libraries

In [31]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, accuracy_score
import itertools
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.linear_model import RidgeCV, LassoCV

# Q1.

### Set a random seed for reproducibility & Defining parameters

In [2]:
np.random.seed(42)
n_samples = 500      # number of data points
n_features = 7       # number of features

### Create a covariance matrix to make features correlated

In [3]:
base_corr = 0.9
cov = np.full((n_features, n_features), base_corr)
np.fill_diagonal(cov, 1.0)  # diagonal = 1 (self-correlation)

### Generate multivariate normal data (highly correlated)

In [4]:
mean = np.zeros(n_features)
X = np.random.multivariate_normal(mean, cov, size=n_samples)

### Define true coefficients and bias

In [5]:
true_weights = np.array([2.5, -1.8, 1.2, 0.8, 0.5, 1.5, -0.7])
bias = 3.0

### Generate target variable with some noise They define how the target variable (y) is generated from your features (X) using a linear relationship + randomness

In [6]:
noise = np.random.normal(0, 1.5, size=n_samples)
y = X.dot(true_weights) + bias + noise

### Create DataFrame

In [7]:
columns = [f'Feature_{i+1}' for i in range(n_features)]
df = pd.DataFrame(X, columns=columns)
df['Target'] = y

### Display correlation matrix

In [8]:
print("Feature Correlation Matrix:")
print(df.corr())

Feature Correlation Matrix:
           Feature_1  Feature_2  Feature_3  Feature_4  Feature_5  Feature_6  \
Feature_1   1.000000   0.897925   0.899279   0.895184   0.894056   0.894905   
Feature_2   0.897925   1.000000   0.904170   0.914528   0.902848   0.916318   
Feature_3   0.899279   0.904170   1.000000   0.906929   0.890779   0.892586   
Feature_4   0.895184   0.914528   0.906929   1.000000   0.902122   0.895103   
Feature_5   0.894056   0.902848   0.890779   0.902122   1.000000   0.889743   
Feature_6   0.894905   0.916318   0.892586   0.895103   0.889743   1.000000   
Feature_7   0.903157   0.907611   0.905746   0.902690   0.903508   0.895431   
Target      0.903654   0.807414   0.869086   0.854551   0.850811   0.871616   

           Feature_7    Target  
Feature_1   0.903157  0.903654  
Feature_2   0.907611  0.807414  
Feature_3   0.905746  0.869086  
Feature_4   0.902690  0.854551  
Feature_5   0.903508  0.850811  
Feature_6   0.895431  0.871616  
Feature_7   1.000000  0.82968

### Save dataset

In [9]:
df.to_csv("highly_correlated_dataset.csv", index=False)
print("\nDataset saved as 'highly_correlated_dataset.csv'")


Dataset saved as 'highly_correlated_dataset.csv'


### Implement Ridge Regression using Gradient Descent

In [10]:
def ridge_regression_gd(X, y, lr=0.01, reg_lambda=1.0, epochs=1000):
    m, n = X.shape
    X_b = np.c_[np.ones((m, 1)), X]  # Add bias term
    w = np.zeros(n + 1)              # Initialize weights (including bias)

    for epoch in range(epochs):
        y_pred = X_b.dot(w)
        error = y_pred - y
        # Gradient includes regularization for all weights except bias
        gradient = (1/m) * X_b.T.dot(error) + (reg_lambda/m) * np.r_[0, w[1:]]
        w -= lr * gradient
    return w

### Define cost function

In [11]:
def ridge_cost(X, y, w, reg_lambda):
    m = len(y)
    X_b = np.c_[np.ones((m, 1)), X]
    y_pred = X_b.dot(w)
    mse = np.mean((y - y_pred)**2)
    reg_term = (reg_lambda / (2*m)) * np.sum(w[1:]**2)
    return mse + reg_term

### Trying different hyperparameters

In [12]:
learning_rates = [0.0001, 0.001, 0.01, 0.1]  # remove very large values
reg_params = [1e-10, 1e-5, 1e-3, 0.1, 1, 10]

best_r2 = -np.inf
best_params = None
best_cost = np.inf
best_w = None

for lr, reg_lambda in itertools.product(learning_rates, reg_params):
    try:
        w = ridge_regression_gd(X, y, lr=lr, reg_lambda=reg_lambda, epochs=500)
        cost = ridge_cost(X, y, w, reg_lambda)
        X_b = np.c_[np.ones((len(X), 1)), X]
        y_pred = X_b.dot(w)

        # Skip if invalid numbers appear
        if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)):
            print(f"⚠️ Skipping (lr={lr}, λ={reg_lambda}) due to NaN/Inf values.")
            continue

        r2 = r2_score(y, y_pred)

        if r2 > best_r2 or (r2 == best_r2 and cost < best_cost):
            best_r2 = r2
            best_cost = cost
            best_params = (lr, reg_lambda)
            best_w = w

    except Exception as e:
        print(f"⚠️ Skipping (lr={lr}, λ={reg_lambda}) due to error: {e}")
        continue

### Display best results

In [13]:
print("\n✅ Best Parameters:")
print(f"Learning Rate: {best_params[0]}")
print(f"Regularization (Lambda): {best_params[1]}")
print(f"Best R² Score: {best_r2:.5f}")
print(f"Minimum Cost: {best_cost:.5f}")
print("\nFinal Weights (including bias):")
print(best_w)


✅ Best Parameters:
Learning Rate: 0.1
Regularization (Lambda): 1e-10
Best R² Score: 0.87080
Minimum Cost: 2.30901

Final Weights (including bias):
[ 2.97682643  2.57040649 -1.98002997  1.17518879  0.70797883  0.63974798
  1.643919   -0.69436831]


### Saving the results

In [14]:
results = {
    "Learning Rate": best_params[0],
    "Lambda": best_params[1],
    "Best R2": best_r2,
    "Best Cost": best_cost
}
pd.DataFrame([results]).to_csv("ridge_best_results.csv", index=False)
print("\nResults saved to 'ridge_best_results.csv'")


Results saved to 'ridge_best_results.csv'


# Q2.

### Loading dataset

In [15]:
hitters = pd.read_csv('/content/Hitters (1).csv')

### Preprocessing

In [16]:
hitters.dropna(inplace=True)

### Label encoding

In [17]:
for col in hitters.select_dtypes(include=['object']).columns:
    hitters[col] = LabelEncoder().fit_transform(hitters[col])

### Target and data division

In [18]:
X = hitters.drop('Salary', axis=1)
y = hitters['Salary']

### Training and Testing data

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Standardizing

In [20]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Learning Rate

In [21]:
ridge_alpha = lasso_alpha = 0.5748

### Model training

In [22]:
models = {
    "Linear": LinearRegression(),
    "Ridge": Ridge(alpha=ridge_alpha),
    "Lasso": Lasso(alpha=lasso_alpha)
}

In [23]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name} Regression R2 Score: {r2_score(y_test, y_pred):.4f}")

Linear Regression R2 Score: 0.2907
Ridge Regression R2 Score: 0.3000
Lasso Regression R2 Score: 0.2993


  model = cd_fast.enet_coordinate_descent(


# Q3.

### Dataset Loading

In [40]:
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

  raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)


### Data division

In [25]:
X,y = data,target

### Training & Testing data & Standardization

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Ridge & Lasso

In [27]:
ridge_cv = RidgeCV(alphas=[0.1, 1, 10], store_cv_values=True)
lasso_cv = LassoCV(alphas=[0.1, 1, 10], cv=5, random_state=42)

In [28]:
ridge_cv.fit(X_train, y_train)
lasso_cv.fit(X_train, y_train)

ridge_pred = ridge_cv.predict(X_test)
lasso_pred = lasso_cv.predict(X_test)



### Result

In [29]:
print(f"Best Ridge Alpha: {ridge_cv.alpha_}")
print(f"Ridge R2: {r2_score(y_test, ridge_pred):.4f}")
print(f"Best Lasso Alpha: {lasso_cv.alpha_}")
print(f"Lasso R2: {r2_score(y_test, lasso_pred):.4f}")

Best Ridge Alpha: 10.0
Ridge R2: 0.6660
Best Lasso Alpha: 0.1
Lasso R2: 0.6501


# Q4.

### Loading Dataset

In [32]:
from sklearn.datasets import load_iris

In [33]:
iris = load_iris()
X, y = iris.data, iris.target

### Standardization

In [34]:
X = StandardScaler().fit_transform(X)

### Sigmoid Function

In [35]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

### Training logistic regression model

In [36]:
def train_logistic_ovr(X, y, lr=0.1, epochs=1000):
    m, n = X.shape
    classes = np.unique(y)
    W = np.zeros((len(classes), n))
    b = np.zeros(len(classes))
    for idx, c in enumerate(classes):
        y_c = (y == c).astype(int)
        for _ in range(epochs):
            z = np.dot(X, W[idx]) + b[idx]
            h = sigmoid(z)
            grad_w = (1/m) * np.dot(X.T, (h - y_c))
            grad_b = (1/m) * np.sum(h - y_c)
            W[idx] -= lr * grad_w
            b[idx] -= lr * grad_b
    return W, b

### Predicting Function

In [37]:
def predict_ovr(X, W, b):
    z = np.dot(X, W.T) + b
    probs = sigmoid(z)
    return np.argmax(probs, axis=1)

In [38]:
W, b = train_logistic_ovr(X, y, lr=0.1, epochs=2000)
y_pred = predict_ovr(X, W, b)

### Result

In [39]:
print(f"Training Accuracy: {accuracy_score(y, y_pred):.4f}")

Training Accuracy: 0.9467
