In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# datasets
from sklearn.datasets import fetch_california_housing, load_diabetes
from ucimlrepo import fetch_ucirepo 
abalone = fetch_ucirepo(id=1) 

  
# data (as pandas dataframes)
from pathlib import Path
import importlib.util

module_path = Path.cwd().resolve() / ".." /"src" / "nnknn" / "nnknn.py"
spec = importlib.util.spec_from_file_location("nnknn", str(module_path))
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
NNKNN = mod.NNKNN



In [6]:
def load_california_dataset():
    data = fetch_california_housing()
    X = data.data.astype(np.float32)
    y = data.target.reshape(-1, 1).astype(np.float32)
    return X, y

# Common preprocessing function
def preprocess_dataset(X, y, test_size=0.2):
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=42
    )

    return X_train, X_test, y_train, y_test, scaler_X, scaler_y


In [None]:
X,y = load_california_dataset()
X_train, X_test, y_train, y_test, scaler_X, scaler_y = preprocess_dataset(X, y)

# Convert to torch
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32)
X_test_t  = torch.tensor(X_test, dtype=torch.float32)
y_test_t  = torch.tensor(y_test, dtype=torch.float32)


In [None]:
##############################################
# Train NN-kNN model
##############################################

# shared weights = false takes longer to train but generally does better performance wise
model = NNKNN(
    num_features=X_train_t.shape[1],
    num_cases=X_train_t.shape[0],
    # shared_weights=True
)


print("Training NN-kNN Regression Model...\n")

batch_size = 128
num_epochs = 100

model.train()
optimizer = optim.Adam(model.parameters(), lr=1e-2)
criterion = nn.MSELoss()

for epoch in range(num_epochs):
    for i in range(0, X_train_t.size(0), batch_size):
        optimizer.zero_grad()

        # Forward pass
        queries = X_train_t[i:i+batch_size]
        y_hat, activations, delta = model(queries, X_train_t, y_train_t)

        # Loss on corresponding batch targets
        loss = criterion(y_hat, y_train_t[i:i+batch_size])

        # Backpropagation
        loss.backward()
        optimizer.step()

    # Print once per epoch
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

    
    

Training NN-kNN Regression Model...

Epoch 0, Loss: 0.3393
Epoch 10, Loss: 0.2148
Epoch 20, Loss: 0.2033


In [None]:
with torch.no_grad():
    y_pred, activations, delta = model(
        queries=X_test_t,   
        cases=X_train_t,   
        targets=y_train_t  
    )

# Inverse transform
preds_np = scaler_y.inverse_transform(y_pred.numpy())
ytrue = scaler_y.inverse_transform(y_test)

nnknn_mse = mean_squared_error(ytrue, preds_np)
nnknn_mae = mean_absolute_error(ytrue, preds_np)
nnknn_r2 = r2_score(ytrue, preds_np)

In [None]:
# ---------------------------------------------------------
# Baseline: Linear Regression
# ---------------------------------------------------------

lr = LinearRegression()
lr.fit(X_train, y_train)

preds_lr = scaler_y.inverse_transform(lr.predict(X_test))

lr_mse = np.sqrt(mean_squared_error(ytrue, preds_lr))
lr_mae = mean_absolute_error(ytrue, preds_lr)
lr_r2 = r2_score(ytrue, preds_lr)


# ---------------------------------------------------------
# Baseline: KNN Regressor
# ---------------------------------------------------------

knn = KNeighborsRegressor(n_neighbors=8)
knn.fit(X_train, y_train)

preds_knn = scaler_y.inverse_transform(knn.predict(X_test))

knn_mse = np.sqrt(mean_squared_error(ytrue, preds_knn))
knn_mae = mean_absolute_error(ytrue, preds_knn)
knn_r2 = r2_score(ytrue, preds_knn)


# ---------------------------------------------------------
# 8. Print comparison table
# ---------------------------------------------------------

print("================== CALIFORNIA RESULTS ======================")
print(f"Linear Regression:  MSE={lr_mse:.4f}, MAE={lr_mae:.4f}, R²={lr_r2:.4f}")
print(f"KNN (k=8):          MSE={knn_mse:.4f}, MAE={knn_mae:.4f}, R²={knn_r2:.4f}")
print(f"NN-kNN Regression:  MSE={nnknn_mse:.4f}, MAE={nnknn_mae:.4f}, R²={nnknn_r2:.4f}")
print("============================================================")


Linear Regression:  MSE=0.7456, MAE=0.5332, R²=0.5758
KNN (k=8):          MSE=0.6485, MAE=0.4385, R²=0.6791
NN-kNN Regression:  MSE=0.4740, MAE=0.4666, R²=0.6383
