<a href="https://colab.research.google.com/github/PrishaAggarwal/Machine_Learning_Assignments/blob/main/ML_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score

# 1. Generate Highly Correlated Dataset

np.random.seed(42)
n_samples = 500

x_base = np.random.randn(n_samples)

X = np.column_stack([
    x_base,
    x_base * 0.9 + np.random.randn(n_samples) * 0.1,
    x_base * 1.1 + np.random.randn(n_samples) * 0.1,
    x_base * 0.8 + np.random.randn(n_samples) * 0.1,
    x_base * 1.05 + np.random.randn(n_samples) * 0.1,
    x_base * 0.95 + np.random.randn(n_samples) * 0.1,
    x_base * 1.2 + np.random.randn(n_samples) * 0.1
])

true_w = np.array([4, -3, 2.5, 5, -1, 3, 2])
y = X.dot(true_w) + np.random.randn(n_samples) * 0.5

# Add bias term
X = np.c_[np.ones((n_samples, 1)), X]

# 2. Ridge Regression using Gradient Descent (Stable Version)
def ridge_gradient_descent(X, y, lr, lam, iterations=1000):
    m, n = X.shape
    w = np.zeros(n)

    for _ in range(iterations):
        y_pred = X.dot(w)

        # Gradient
        grad = (1/m) * X.T.dot(y_pred - y)
        grad[1:] += (lam/m) * w[1:]

        # Gradient clipping to prevent explosion
        grad = np.clip(grad, -1e5, 1e5)

        # Update rule
        w -= lr * grad

        # Check for instability
        if np.any(np.isnan(w)) or np.any(np.isinf(w)):
            return w, np.nan

    # Final cost function
    cost = (1/(2*m)) * np.sum((X.dot(w) - y)**2) + (lam/(2*m)) * np.sum(w[1:]**2)
    return w, cost

# 3. Hyperparameter Search

learning_rates = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lambdas = [10**-15, 10**-10, 10**-5, 10**-3, 0, 1, 10, 20]

results = []

for lr in learning_rates:
    for lam in lambdas:
        w, final_cost = ridge_gradient_descent(X, y, lr, lam)

        if np.isnan(final_cost):
            continue

        y_pred = X.dot(w)

        if np.any(np.isnan(y_pred)):
            continue

        r2 = r2_score(y, y_pred)

        results.append([lr, lam, final_cost, r2])


# Convert to DataFrame
df = pd.DataFrame(results, columns=["Learning Rate", "Lambda", "Cost", "R2 Score"])
print("\n=== All Results ===\n")
print(df)

# 4. Best Parameters

best = df.loc[df["R2 Score"].idxmax()]
best_lr = best["Learning Rate"]
best_lam = best["Lambda"]

print("\n=== Best Hyperparameters ===")
print(best)

# Train final model
final_w, final_cost = ridge_gradient_descent(X, y, best_lr, best_lam)
final_r2 = r2_score(y, X.dot(final_w))

print("\n=== Final Model Performance ===")
print("Best Learning Rate   :", best_lr)
print("Best Lambda          :", best_lam)
print("Final Cost Function  :", final_cost)
print("Final R2 Score       :", final_r2)
print("Final Weights        :", final_w)


=== All Results ===

    Learning Rate        Lambda          Cost      R2 Score
0          0.0001  1.000000e-15  1.890233e+01  7.415456e-01
1          0.0001  1.000000e-10  1.890233e+01  7.415456e-01
2          0.0001  1.000000e-05  1.890233e+01  7.415456e-01
3          0.0001  1.000000e-03  1.890234e+01  7.415456e-01
4          0.0001  0.000000e+00  1.890233e+01  7.415456e-01
5          0.0001  1.000000e+00  1.891078e+01  7.415015e-01
6          0.0001  1.000000e+01  1.898674e+01  7.411045e-01
7          0.0001  2.000000e+01  1.907096e+01  7.406635e-01
8          0.0010  1.000000e-15  3.377093e-01  9.953825e-01
9          0.0010  1.000000e-10  3.377093e-01  9.953825e-01
10         0.0010  1.000000e-05  3.377095e-01  9.953825e-01
11         0.0010  1.000000e-03  3.377306e-01  9.953825e-01
12         0.0010  0.000000e+00  3.377093e-01  9.953825e-01
13         0.0010  1.000000e+00  3.590258e-01  9.953817e-01
14         0.0010  1.000000e+01  5.503142e-01  9.953675e-01
15         0.0010 

In [None]:
# Q2
#Using Another dataset instead of hitler since that file isnt opening
# (a) Load & Preprocess Dataset
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

# Load diabetes dataset
data = load_diabetes()

# Convert to DataFrame
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

print("Shape of X:", X.shape)
print("Missing values:\n", X.isna().sum())

# (b) Fit Linear, Ridge, LASSO Regression

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# (c) Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Regularization parameters
alpha_val = 0.5748

# Initialize models
lin_model = LinearRegression()
ridge_model = Ridge(alpha=alpha_val)
lasso_model = Lasso(alpha=alpha_val, max_iter=10000)

# Fit models
lin_model.fit(X_train_scaled, y_train)
ridge_model.fit(X_train_scaled, y_train)
lasso_model.fit(X_train_scaled, y_train)

# (d) Model Evaluation
def evaluate(model, X_test, y_test):
    pred = model.predict(X_test)
    mse = mean_squared_error(y_test, pred)
    r2 = r2_score(y_test, pred)
    return mse, r2

lin_mse, lin_r2 = evaluate(lin_model, X_test_scaled, y_test)
ridge_mse, ridge_r2 = evaluate(ridge_model, X_test_scaled, y_test)
lasso_mse, lasso_r2 = evaluate(lasso_model, X_test_scaled, y_test)

# Print results
print("\n Model Performance")
print(f"Linear Regression -> MSE: {lin_mse:.4f}, R2: {lin_r2:.4f}")
print(f"Ridge Regression  -> MSE: {ridge_mse:.4f}, R2: {ridge_r2:.4f}")
print(f"LASSO Regression  -> MSE: {lasso_mse:.4f}, R2: {lasso_r2:.4f}")

# Best model conclusion
best = max([("Linear", lin_r2), ("Ridge", ridge_r2), ("LASSO", lasso_r2)], key=lambda x: x[1])
print(f"\nBest model: {best[0]} (highest R2 score)")


Shape of X: (442, 10)
Missing values:
 age    0
sex    0
bmi    0
bp     0
s1     0
s2     0
s3     0
s4     0
s5     0
s6     0
dtype: int64

 Model Performance
Linear Regression -> MSE: 2900.1936, R2: 0.4526
Ridge Regression  -> MSE: 2894.7666, R2: 0.4536
LASSO Regression  -> MSE: 2851.9489, R2: 0.4617

Best model: LASSO (highest R2 score)


In [None]:
#Q3
# RidgeCV & LassoCV on California housing since boston dataset is removed from latest colab version

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load Boston Dataset
data = fetch_california_housing()

X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

print("Dataset Shape:", X.shape)
print("Missing Values:\n", X.isna().sum())

# Train–Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Ridge Cross Validation
alphas = np.logspace(-3, 3, 50)

ridge_cv = RidgeCV(alphas=alphas, scoring='neg_mean_squared_error')
ridge_cv.fit(X_train_scaled, y_train)

ridge_pred = ridge_cv.predict(X_test_scaled)

ridge_mse = mean_squared_error(y_test, ridge_pred)
ridge_r2 = r2_score(y_test, ridge_pred)

print("\n----- RidgeCV Results -----")
print("Best α:", ridge_cv.alpha_)
print("MSE:", ridge_mse)
print("R2 Score:", ridge_r2)

# Lasso Cross Validation
lasso_cv = LassoCV(alphas=alphas, max_iter=5000, cv=5)
lasso_cv.fit(X_train_scaled, y_train)

lasso_pred = lasso_cv.predict(X_test_scaled)

lasso_mse = mean_squared_error(y_test, lasso_pred)
lasso_r2 = r2_score(y_test, lasso_pred)

print("\n----- LassoCV Results -----")
print("Best α:", lasso_cv.alpha_)
print("MSE:", lasso_mse)
print("R2 Score:", lasso_r2)

# Final Comparison
print("\n----- Final Comparison -----")
print(f"RidgeCV -> MSE: {ridge_mse:.4f}, R2: {ridge_r2:.4f}")
print(f"LassoCV -> MSE: {lasso_mse:.4f}, R2: {lasso_r2:.4f}")

if ridge_r2 > lasso_r2:
    print("\nBest Model: RidgeCV")
else:
    print("\nBest Model: LassoCV")


Dataset Shape: (20640, 8)
Missing Values:
 MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
dtype: int64

----- RidgeCV Results -----
Best α: 2.023589647725158
MSE: 0.5558175016320879
R2 Score: 0.5758442510228978

----- LassoCV Results -----
Best α: 0.001
MSE: 0.5544913600832686
R2 Score: 0.5768562568705682

----- Final Comparison -----
RidgeCV -> MSE: 0.5558, R2: 0.5758
LassoCV -> MSE: 0.5545, R2: 0.5769

Best Model: LassoCV


In [None]:
#Q4
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# 1. Load Iris Dataset
iris = load_iris()
X = iris.data            # 4 features
y = iris.target          # 3 classes -> 0,1,2

# train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 2️. Sigmoid Function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# 3️. Train Logistic Regression for ONE class (binary)
def train_binary_logistic(X, y, lr=0.1, epochs=2000):
    m, n = X.shape
    W = np.zeros(n)
    b = 0

    for i in range(epochs):
        z = np.dot(X, W) + b
        y_hat = sigmoid(z)

        # gradients
        dW = (1/m) * np.dot(X.T, (y_hat - y))
        db = (1/m) * np.sum(y_hat - y)

        # update
        W -= lr * dW
        b -= lr * db

    return W, b

# 4️. ONE VS REST TRAINING
def train_ovr(X, y):
    classes = np.unique(y)
    weights = {}
    biases = {}

    for cls in classes:
        print(f"Training classifier for class {cls} vs rest...")

        # create binary labels: class = 1, rest = 0
        y_binary = (y == cls).astype(int)

        W, b = train_binary_logistic(X, y_binary)
        weights[cls] = W
        biases[cls] = b

    return weights, biases

# 5️. Making Predictions
def predict_ovr(X, weights, biases):
    class_scores = []

    for cls in weights:
        W = weights[cls]
        b = biases[cls]

        score = sigmoid(np.dot(X, W) + b)
        class_scores.append(score)

    # convert list to matrix → shape: (num_classes, num_samples)
    class_scores = np.array(class_scores)

    # choose class with highest probability
    predictions = np.argmax(class_scores, axis=0)
    return predictions

# 6️. TRAIN OVR MODEL
weights, biases = train_ovr(X_train, y_train)

# 7️. EVALUATE MODEL
y_pred = predict_ovr(X_test, weights, biases)
acc = accuracy_score(y_test, y_pred)

print("\nPredictions:", y_pred)
print("True labels:", y_test)
print("\nFINAL ACCURACY:", acc)


Training classifier for class 0 vs rest...
Training classifier for class 1 vs rest...
Training classifier for class 2 vs rest...

Predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 2 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
True labels: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]

FINAL ACCURACY: 0.9666666666666667
