<a href="https://colab.research.google.com/github/Simran085/ML_LAB_ASS_102216115/blob/main/A4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

# Q1.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Step 1: Generate a dataset with highly correlated features
np.random.seed(42)
n_samples = 1000
X_base = np.random.rand(n_samples, 1) * 10  # Base feature for correlation

# Generate highly correlated columns
X = np.hstack([X_base + np.random.normal(0, 0.1, (n_samples, 1)) * i for i in range(1, 8)])
y = 3 * X_base[:, 0] + np.random.normal(0, 1, n_samples)  # Target variable with some noise

# Convert to DataFrame
df = pd.DataFrame(X, columns=[f'Feature_{i+1}' for i in range(X.shape[1])])
df['Target'] = y

# Step 2: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Implement Ridge Regression with Gradient Descent
def ridge_regression_gradient_descent(X, y, alpha, learning_rate, n_iterations):
    n_samples, n_features = X.shape
    weights = np.zeros(n_features)
    bias = 0
    cost_history = []

    for i in range(n_iterations):
        # Prediction
        y_pred = np.dot(X, weights) + bias

        # Check for overflow or NaN
        if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)):
            print(f"Warning: Numerical issues at iteration {i}. Check learning rate or alpha.")
            break

        # Calculate gradients
        dw = (-2 / n_samples) * np.dot(X.T, (y - y_pred)) + 2 * alpha * weights
        db = (-2 / n_samples) * np.sum(y - y_pred)

        # Gradient clipping (optional)
        max_grad = 1e3  # Set a threshold for gradients
        dw = np.clip(dw, -max_grad, max_grad)
        db = np.clip(db, -max_grad, max_grad)

        # Update weights and bias
        weights -= learning_rate * dw
        bias -= learning_rate * db

        # Compute cost with regularization term
        cost = (1 / n_samples) * np.sum((y - y_pred) ** 2) + alpha * np.sum(weights ** 2)
        cost_history.append(cost)

    return weights, bias, cost_history

# Rerun the training loop with reasonable learning rates and alpha values


# Rerun the training loop with reasonable learning rates and alpha values


# Step 4: Test different values of learning rate and regularization parameter
learning_rates = [0.0001, 0.001, 0.01, 0.1, 1, 10]
regularization_params = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]

best_r2_score = -np.inf
best_params = (None, None)
best_weights, best_bias = None, None

for lr in learning_rates:
    for alpha in regularization_params:
        weights, bias, cost_history = ridge_regression_gradient_descent(X_train, y_train, alpha, lr, n_iterations=1000)
        y_pred_train = np.dot(X_train, weights) + bias
        y_pred_test = np.dot(X_test, weights) + bias

        r2 = r2_score(y_test, y_pred_test)

        if r2 > best_r2_score:
            best_r2_score = r2
            best_params = (lr, alpha)
            best_weights, best_bias = weights, bias

        print(f'Learning Rate: {lr}, Alpha: {alpha}, R2 Score: {r2:.4f}')

# Step 5: Print the best parameters
print("\nBest Parameters:")
print(f'Learning Rate: {best_params[0]}, Regularization Parameter (Alpha): {best_params[1]}')
print(f'Best R2 Score: {best_r2_score:.4f}')


Learning Rate: 0.0001, Alpha: 1e-15, R2 Score: 0.9847
Learning Rate: 0.0001, Alpha: 1e-10, R2 Score: 0.9847
Learning Rate: 0.0001, Alpha: 1e-05, R2 Score: 0.9847
Learning Rate: 0.0001, Alpha: 0.001, R2 Score: 0.9847
Learning Rate: 0.0001, Alpha: 0, R2 Score: 0.9847
Learning Rate: 0.0001, Alpha: 1, R2 Score: 0.9847
Learning Rate: 0.0001, Alpha: 10, R2 Score: 0.9786
Learning Rate: 0.0001, Alpha: 20, R2 Score: 0.9627
Learning Rate: 0.001, Alpha: 1e-15, R2 Score: 0.9856
Learning Rate: 0.001, Alpha: 1e-10, R2 Score: 0.9856
Learning Rate: 0.001, Alpha: 1e-05, R2 Score: 0.9856
Learning Rate: 0.001, Alpha: 0.001, R2 Score: 0.9856
Learning Rate: 0.001, Alpha: 0, R2 Score: 0.9856
Learning Rate: 0.001, Alpha: 1, R2 Score: 0.9849
Learning Rate: 0.001, Alpha: 10, R2 Score: 0.9753
Learning Rate: 0.001, Alpha: 20, R2 Score: 0.9546
Learning Rate: 0.01, Alpha: 1e-15, R2 Score: -520.6752
Learning Rate: 0.01, Alpha: 1e-10, R2 Score: -520.6752
Learning Rate: 0.01, Alpha: 1e-05, R2 Score: -520.6752
Learnin

In [4]:
# Q3.
# Step 1: Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import mean_squared_error, r2_score

# Step 2: Load the California housing dataset
housing = fetch_california_housing()
X = pd.DataFrame(housing.data, columns=housing.feature_names)
y = housing.target

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 5: Apply RidgeCV and LassoCV
alphas = np.logspace(-6, 6, 13)  # Wide range of alpha values for cross-validation

# RidgeCV
ridge_cv = RidgeCV(alphas=alphas, scoring='r2', cv=5)
ridge_cv.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_cv.predict(X_test_scaled)

# LassoCV
lasso_cv = LassoCV(alphas=alphas, cv=5, max_iter=10000)
lasso_cv.fit(X_train_scaled, y_train)
y_pred_lasso = lasso_cv.predict(X_test_scaled)

# Step 6: Evaluate the models
print("Ridge Regression")
print(f"Best Alpha: {ridge_cv.alpha_}")
print(f"R2 Score: {r2_score(y_test, y_pred_ridge):.4f}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_ridge):.4f}")

print("\nLasso Regression")
print(f"Best Alpha: {lasso_cv.alpha_}")
print(f"R2 Score: {r2_score(y_test, y_pred_lasso):.4f}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_lasso):.4f}")


Ridge Regression
Best Alpha: 1e-06
R2 Score: 0.5758
Mean Squared Error: 0.5559

Lasso Regression
Best Alpha: 0.001
R2 Score: 0.5769
Mean Squared Error: 0.5545
