In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

import warnings     # filter warning messages
warnings.simplefilter(action="ignore")

def calc_metrics(y_true, y_pred):
    """
    Calculate Mean Squared Error (MSE) and R-squared (R^2) score.
    """
    mse = np.mean((y_true - y_pred) ** 2)
    r2 = 1 - np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2)
    return mse, r2

def ridge_gradient_descent(X, y, alpha, lambda_, num_iterations):
    """
    Perform gradient descent with Ridge Regression.
    """
    num_samples, num_features = X.shape
    theta = np.zeros(num_features + 1)  # Initialize model parameters to zero, including intercept

    # Insert a column of ones at the beginning of X (intercept term)
    X_with_intercept = np.hstack([np.ones((num_samples, 1)), X])

    for _ in range(num_iterations):
        # Calculate predictions
        y_pred = X_with_intercept @ theta

        # Calculate gradients with Ridge penalty
        gradients = -(2/num_samples) * X_with_intercept.T @ (y - y_pred) + 2 * lambda_ * theta

        # Update parameters
        theta -= alpha * gradients

    return theta

# Read and preprocess the data
house_data_train = pd.read_csv('train.csv').drop(columns=['Unnamed: 0', 'zipcode'])
house_data_test = pd.read_csv('test.csv').drop(columns=['Unnamed: 0', 'id', 'date', 'zipcode'])

# Make copies of the original data
house_data_train_nz = house_data_train.copy()
house_data_test_nz = house_data_test.copy()

# Normalize all columns except the first one
scaler = StandardScaler()
house_data_train_nz.iloc[:, 1:] = scaler.fit_transform(house_data_train_nz.iloc[:, 1:])
house_data_test_nz.iloc[:, 1:] = scaler.transform(house_data_test_nz.iloc[:, 1:])

# Divide values of the first column by 1000
house_data_train_nz.iloc[:, 0] /= 1000
house_data_test_nz.iloc[:, 0] /= 1000

# Assign response variable (y) and features (X) for training and test data
y_train = house_data_train_nz.iloc[:, 0]
X_train = house_data_train_nz.iloc[:, 1:]

y_test = house_data_test_nz.iloc[:, 0]
X_test = house_data_test_nz.iloc[:, 1:]

# Define learning rates, lambda (regularization parameter), and number of iterations
learning_rates = [0.01, 0.1, 0.5]
lambda_values = [0.1, 0.5, 1.0]
num_iterations_list = [10, 50, 100]

# Initialize results dictionary
results_ridge = {'Learning Rate': [], 'Lambda': [], 'Num Iterations': [],
           'MSE Train': [], 'R2 Train': [],
           'MSE Test': [], 'R2 Test': []}

# Iterate over learning rates, lambda values, and number of iterations
for alpha in learning_rates:
    for lambda_ in lambda_values:
        for num_iterations in num_iterations_list:
            # Perform gradient descent with Ridge Regression
            theta = ridge_gradient_descent(X_train.values, y_train.values, alpha, lambda_, num_iterations)

            # Predict on the training set
            y_train_pred = np.hstack([np.ones((len(X_train), 1)), X_train.values]) @ theta

            # Calculate evaluation metrics for training set
            mse_train, r2_train = calc_metrics(y_train, y_train_pred)

            # Predict on the test set
            y_test_pred = np.hstack([np.ones((len(X_test), 1)), X_test.values]) @ theta

            # Calculate evaluation metrics for testing set
            mse_test, r2_test = calc_metrics(y_test, y_test_pred)

            # Append results to the dictionary
            results_ridge['Learning Rate'].append(alpha)
            results_ridge['Lambda'].append(lambda_)
            results_ridge['Num Iterations'].append(num_iterations)
            results_ridge['MSE Train'].append(mse_train)
            results_ridge['R2 Train'].append(r2_train)
            results_ridge['MSE Test'].append(mse_test)
            results_ridge['R2 Test'].append(r2_test)

# Create a DataFrame from the results dictionary
results_ridge_df = pd.DataFrame(results_ridge)

# Display the results
print(results_ridge_df)

    Learning Rate  Lambda  Num Iterations      MSE Train       R2 Train  \
0            0.01     0.1              10   2.367367e+05  -1.056128e+00   
1            0.01     0.1              50   7.538151e+04   3.452895e-01   
2            0.01     0.1             100   4.207005e+04   6.346094e-01   
3            0.01     0.5              10   2.407107e+05  -1.090643e+00   
4            0.01     0.5              50   9.847663e+04   1.447017e-01   
5            0.01     0.5             100   7.081337e+04   3.849652e-01   
6            0.01     1.0              10   2.455374e+05  -1.132564e+00   
7            0.01     1.0              50   1.261911e+05  -9.600672e-02   
8            0.01     1.0             100   1.086283e+05   5.653107e-02   
9            0.10     0.1              10   3.978950e+04   6.544166e-01   
10           0.10     0.1              50   3.397708e+04   7.048991e-01   
11           0.10     0.1             100   3.396428e+04   7.050103e-01   
12           0.10     0.5