In [1]:
import numpy as np
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [2]:
# Load the training data and the test data.
training_data = np.loadtxt("data/ridgetrain.txt")
testing_data = np.loadtxt("data/ridgetest.txt")

In [3]:
# Define the bandwidth parameter.
gamma = 0.1
x_train, y_train = training_data[:, 0].reshape(-1, 1), training_data[:, 1]
x_test, y_test = testing_data[:, 0].reshape(-1, 1), testing_data[:, 1]
number_train, number_test = len(x_train), len(x_test)

In [4]:
# Part-1: Kernel Ridge Regression.
def rbf_kernel(x1, x2, gamma):
    return np.exp(-gamma * (np.linalg.norm(x1 - x2)) ** 2)

def kernel_ridge_regression(x_train, y_train, x_test, number_train, number_test, hyperparameter, gamma):
    kernel_train = np.zeros((number_train, number_train))
    kernel_test = np.zeros((number_test, number_train))
    for i in range(number_train):
        for j in range(number_train):
            kernel_train[i, j] = rbf_kernel(x_train[i], x_train[j], gamma)
    for i in range(number_test):
        for j in range(number_train):
            kernel_test[i, j] = rbf_kernel(x_test[i], x_train[j], gamma)
    alpha = np.linalg.inv(kernel_train + hyperparameter * np.eye(number_train)).dot(y_train)
    y_hat = kernel_test.dot(alpha)
    return y_hat

hyperparam = [0.1, 1, 10, 100]
for i, hyperparameter in enumerate(hyperparam, 1):
    y_hat = kernel_ridge_regression(x_train, y_train, x_test, number_train, number_test, hyperparameter, gamma)
    plt.scatter(x_test, y_test, color="blue", marker="x")
    plt.scatter(x_test, y_hat, color="red", marker="x")
    rmse = np.sqrt(mean_squared_error(y_test, y_hat))
    plt.title(f"Kernel Ridge Regression with lambda = {hyperparameter} and RMSE = {rmse}")
    plt.xlabel("Inputs")
    plt.ylabel("Predicted/True Outputs")
    path = "Kernel_Ridge_regression_" + str(hyperparameter)+".png"
    plt.savefig(path)
    plt.close()

In [7]:
# Part-2: Landmark Ridge Regression.
def rbf_kernel(x1, x2, gamma):
    return np.exp(-gamma * (np.linalg.norm(x1 - x2)) ** 2)

def landmark_ridge_regression(x_train, y_train, x_test, number_train, number_test, hyperparameter, L, gamma):
    landmarks = x_train[np.random.choice(number_train, L, replace=False)]
    kernel_train = np.zeros((L, number_train))
    kernel_test = np.zeros((L, number_test))
    for i in range(L):
        for j in range(number_train):
            kernel_train[i, j] = rbf_kernel(landmarks[i], x_train[j], gamma)
    for i in range(L):
        for j in range(number_test):
            kernel_test[i, j] = rbf_kernel(landmarks[i], x_test[j], gamma)
    W = np.linalg.inv(kernel_train.dot(kernel_train.T) + hyperparameter * np.eye(L)).dot(kernel_train).dot(y_train)
    y_hat = kernel_test.T.dot(W)
    return y_hat

hyperparameter = 0.1
L_values = [2, 5, 20, 50, 100]
for i, L in enumerate(L_values, 1):
    y_hat = landmark_ridge_regression(x_train, y_train, x_test, number_train, number_test, hyperparameter, L, gamma)
    plt.scatter(x_test, y_test, color="blue", marker="x")
    plt.scatter(x_test, y_hat, color="red", marker="x")
    rmse = np.sqrt(mean_squared_error(y_test, y_hat))
    plt.title(f"Landmark Ridge Regression with lambda = {hyperparameter}, L = {L} and RMSE = {rmse}")
    plt.xlabel("Inputs")
    plt.ylabel("Predicted/True Outputs")
    path = "Landmark_Ridge_regression_" + str(L)+".png"
    plt.savefig(path)
    plt.close()