In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from scipy.spatial.distance import cdist
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120

# Kernel functions encapsulated in a class
class Kernel:
    @staticmethod
    def Gaussian(x):
        return np.where(np.abs(x) > 4, 0, 1 / np.sqrt(2 * np.pi) * np.exp(-0.5 * x**2))

    @staticmethod
    def Tricubic(x):
        return np.where(np.abs(x) > 1, 0, (1 - np.abs(x)**3)**3)

    @staticmethod
    def Epanechnikov(x):
        return np.where(np.abs(x) > 1, 0, 3 / 4 * (1 - np.abs(x)**2))

    @staticmethod
    def Quartic(x):
        return np.where(np.abs(x) > 1, 0, 15 / 16 * (1 - np.abs(x)**2)**2)

# Utility function to calculate pairwise distances
def calculate_distances(X):
    return cdist(X, X, metric='euclidean')

# Function to calculate kernel-based weights
def kernel_function(distances, kernel, tau):
    return kernel(distances / (2 * tau))

# Ridge Regression Model with Kernel Weighting
class KernelWeightedRidgeModel:
    def __init__(self, kernel=Kernel.Gaussian, tau=0.05, alpha=0.001, max_iter=5000):
        self.kernel = kernel
        self.tau = tau
        self.alpha = alpha
        self.max_iter = max_iter
        self.model = Ridge(alpha=self.alpha, max_iter=self.max_iter)
    
    def fit(self, X, y, distances):
        # Calculate weights for each data point
        self.weights = kernel_function(distances, self.kernel, self.tau)
        
        # Train the model with weighted data
        W = np.diag(self.weights[:, 0])  # Use weights for the first data point (example)
        self.model.fit(W @ X, W @ y)
    
    def predict(self, X_new):
        return self.model.predict(X_new.reshape(1, -1))

    def evaluate(self, X_test, y_test):
        y_pred = self.model.predict(X_test)
        return mean_squared_error(y_test, y_pred)

data = pd.read_csv('https://github.com/dvasiliu/AAML/blob/main/Data%20Sets/mtcars.csv?raw=True')
X = data.drop(columns=['model', 'mpg']).values
y = data['mpg'].values

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Main workflow to train and evaluate the model
def main():
    # Calculate distances
    distances = calculate_distances(X_scaled)
    
    # Initialize model
    model = KernelWeightedRidgeModel(kernel=Kernel.Gaussian, tau=0.05, alpha=0.001, max_iter=5000)
    
    # Fit the model
    model.fit(X_scaled, y, distances)
    
    # Make a prediction for the first test point
    y_pred = model.predict(X_scaled[0])
    print(f"Predicted value: {y_pred[0]}, Actual value: {y[0]}")
    
    # Evaluate the model
    mse = model.evaluate(X_scaled, y)
    print(f"Mean Squared Error: {mse}")

# Run the main function
if __name__ == "__main__":
    main()

Predicted value: 20.97295036473242, Actual value: 21.0
Mean Squared Error: 554.421766688445
