In [None]:
import torch

import numpy as np
import pandas as pd 

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
  
from sklearn.metrics import mean_squared_error
from joblib import Parallel, delayed

from pathlib import Path
import importlib.util

module_path = Path.cwd().resolve() / ".." /"src" / "nnknn" / "nnknn.py"
spec = importlib.util.spec_from_file_location("nnknn", str(module_path))
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
NNKNN = mod.NNKNN


In [3]:
daily_train = pd.read_csv('../src/data/m4_forecasting/Daily-train.csv')
daily_test = pd.read_csv('../src/data/m4_forecasting/Daily-test.csv')

In [5]:

def rmse(actual, predicted):
    return np.sqrt(mean_squared_error(actual, predicted))

In [6]:
def clean_daily_series(row):
    # Drop the ID in column V1
    ts = row.iloc[1:]

    # Drop trailing NaNs (uneven lengths)
    ts = ts.dropna().astype(float)

    # Assign daily index (fake but consistent)
    ts.index = pd.date_range(start="2000-01-01", periods=len(ts), freq="D")

    return ts


In [7]:
def build_windows(ts, L, H):
    X, Y = [], []
    for i in range(len(ts) - L - H + 1):
        X.append(ts[i:i+L])
        Y.append(ts[i+L:i+L+H])
    return torch.tensor(X, dtype=torch.float32), torch.tensor(Y, dtype=torch.float32)

In [None]:
def forecast_nnknn(train_row, L=30, H=7, shared_weights=False):
    # clean series
    ts = train_row.dropna().astype(float).values
    if len(ts) < L + H:
        return None  # skip too short series

    # build sliding windows
    X_cases, Y_targets = build_windows(ts, L=L, H=H)
    query = ts[-L:]
    query = torch.tensor(query, dtype=torch.float32).unsqueeze(0)

    # build model
    model = NNKNN(num_features=L, num_cases=X_cases.shape[0], shared_weights=shared_weights)
    
    # forward pass
    with torch.no_grad():
        y_hat, _, _ = model(query, X_cases, Y_targets)
    
    return y_hat.squeeze().numpy()  # shape [H]



In [None]:
def evaluate_nnknn(train_row, test_row, L=30, H=7):
    train_ts = clean_daily_series(train_row)
    test_ts = clean_daily_series(test_row)

    y_pred = forecast_nnknn(train_ts, L=L, H=H)
    y_true = test_ts

    return rmse(y_true[:H], y_pred[:H])


In [11]:
L = 30 #size of window
HORIZON = 7 # forecast horizon
NUM_SERIES = 50

daily_train_copy = daily_train.copy()
daily_test_copy = daily_test.copy()

results = Parallel(n_jobs=-1, backend="loky", verbose=10)(
        delayed(evaluate_nnknn)(daily_train_copy.iloc[i], daily_test_copy.iloc[i], L=L, H=HORIZON)
        for i in range(NUM_SERIES)
    )
print("RMSE for each series:", results)
print(f"mean rmse: {np.mean(results)}")
print(f"median rmse: {np.median(results)}")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done  41 out of  50 | elapsed:    3.7s remaining:    0.8s


RMSE for each series: [601.1320931627347, 40.35282427376542, 60.72543192779937, 116.13786475019234, 1318.8129616249585, 2048.5042731775616, 5536.853060413103, 344.09175284276705, 254.889277481614, 355.62664000040303, 684.3298478665554, 528.398797222369, 1799.1196554881706, 2810.9828722829584, 6117.238345271734, 3030.3245524613494, 537.2297630884768, 592.0129529968294, 1446.727099584787, 1548.1520229908144, 1607.3209256377443, 650.0449715383229, 183.4211419013946, 549.7089745410957, 148.47828077598797, 529.0173548560582, 115.59139762928145, 591.1597386633957, 2846.6766706760654, 540.7047081702618, 836.4473566782743, 1098.0656600480293, 144.8521394772646, 1172.277993121258, 920.5012185672076, 187.20998556790732, 75.97848289682447, 329.2822951273858, 225.11492387472245, 193.8749966847366, 2849.8709236410573, 2454.9822294138785, 51.27881728046375, 375.11047705888006, 57.616953678225265, 124.88030573460036, 43.3677215914698, 7813.4229456916, 7317.402879291807, 8715.267122332318]
mean rmse: 

[Parallel(n_jobs=-1)]: Done  47 out of  50 | elapsed:    3.9s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    4.1s finished


## Potential Improvements:
