In [None]:
import torch
from torch import nn
import numpy as np
import pandas as pd 

from sklearn.preprocessing import StandardScaler
from joblib import Parallel, delayed

from utils import clean_daily_series, build_windows, rmse, mae, smape, mase, directional_accuracy, print_evaluation_table
from pathlib import Path
import importlib.util

module_path = Path.cwd().resolve() / ".." /"src" / "nnknn" / "nnknn.py"
spec = importlib.util.spec_from_file_location("nnknn", str(module_path))
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
NNKNN = mod.NNKNN


In [2]:
daily_train = pd.read_csv('../src/data/m4_forecasting/Daily-train.csv')
daily_test = pd.read_csv('../src/data/m4_forecasting/Daily-test.csv')

In [None]:
def train_nnknn(X_cases, Y_targets, L, epochs=100, lr=0.01, shared_weights=False):
    model = NNKNN(num_features=L, num_cases=X_cases.shape[0], shared_weights=shared_weights)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    
    for _epoch in range(epochs):
        optimizer.zero_grad()
        # Use the cases themselves as queries for training
        y_hat, _, _ = model(X_cases, X_cases, Y_targets)
        loss = criterion(y_hat, Y_targets)
        loss.backward()
        optimizer.step()
    
    return model

def forecast_nnknn(train_row, L=30, H=7, epochs=100, shared_weights=False):
    if isinstance(train_row, pd.Series):
        train_row = train_row.to_numpy().flatten()

    X_cases, Y_targets = build_windows(train_row, L=L, H=H)

    X_cases = torch.tensor(X_cases, dtype=torch.float32)
    Y_targets = torch.tensor(Y_targets, dtype=torch.float32)
    
    # train model
    model = train_nnknn(X_cases, Y_targets, L=L, epochs=epochs, shared_weights=shared_weights)

    # last window as query
    query = torch.tensor(train_row[-L:], dtype=torch.float32).unsqueeze(0)
  

    with torch.no_grad():
        y_hat, _, _ = model(query, X_cases, Y_targets)
    
    return y_hat.squeeze(0).cpu().numpy()  # shape [H]


In [16]:
def evaluate_forecast(train_row, test_row, L=30, H=14):

    train_ts = clean_daily_series(train_row)
    test_ts = clean_daily_series(test_row)
    

    # Enforce M4 horizon limit (daily test series have length of 14)
    H_eval = min(H, len(test_ts))

    scaler = StandardScaler()
    train_ts_scaled = scaler.fit_transform(
        train_ts.to_numpy().reshape(-1, 1)
    ).flatten()

    y_pred = forecast_nnknn(train_ts_scaled, L=L, H=H_eval)
    y_pred = np.atleast_1d(y_pred)

    y_pred = scaler.inverse_transform(
        y_pred.reshape(-1, 1)
    ).flatten()
    
    y_true = np.atleast_1d(test_ts)

    return (
        rmse(y_true[:H_eval], y_pred[:H_eval]),
        mae(y_true[:H_eval], y_pred[:H_eval]),
        smape(y_true[:H_eval], y_pred[:H_eval]),
        mase(y_true[:H_eval], y_pred[:H_eval], train_ts, m=7),
        directional_accuracy(y_true[:H_eval], y_pred[:H_eval])
    )


In [None]:
horizons = [1, 7, 14]
L = 30
NUM_SERIES = 50

daily_train_copy = daily_train.copy()
daily_test_copy = daily_test.copy()

all_results = {}

for H in horizons:
    print(f"Evaluating horizon H={H}")
    results = Parallel(n_jobs=-1, backend="loky", verbose=10)(
        delayed(evaluate_forecast)(daily_train_copy.iloc[i], daily_test_copy.iloc[i], L=L, H=H)
        for i in range(NUM_SERIES)
    )
    all_results[H] = results


Evaluating horizon H=1


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  7.4min
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  8.8min
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  9.9min
[Parallel(n_jobs=-1)]: Done  41 out of  50 | elapsed: 11.5min remaining:  2.5min
[Parallel(n_jobs=-1)]: Done  47 out of  50 | elapsed: 11.7min remaining:   44.8s


Evaluating horizon H=7


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 11.8min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
python(92720) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(92721) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(92722) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(92723) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(92724) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  5.6min
[Parallel(n_jobs=-1)]: Done  41 out of  50 | elapsed:

Evaluating horizon H=14


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:  6.6min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
python(94261) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(94262) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(94263) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(94264) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  3.5min
python(95098) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(95135) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  3.9min
[Parallel(n_jobs=

In [18]:
for H, results in all_results.items():
    # results: list of tuples per series
    rmses, maes, smapes, mases, das = zip(*results)

    print(f"\nHorizon: {H}")
    print_evaluation_table(rmses, maes, smapes, mases, das)



Horizon: 1
      Metric      Mean    Median
0       RMSE  329.6796  187.0578
1        MAE  329.6796  187.0578
2  sMAPE (%)   10.1814    5.2177
3       MASE    4.7882    1.6645
4         DA       NaN       NaN

Horizon: 7
      Metric      Mean    Median
0       RMSE  418.5363  229.9372
1        MAE  406.3746  224.1643
2  sMAPE (%)   11.8964    6.8392
3       MASE    5.3511    2.2247
4         DA    0.5433    0.5000

Horizon: 14
      Metric      Mean    Median
0       RMSE  423.0747  213.3350
1        MAE  399.0892  205.7850
2  sMAPE (%)   11.6774    7.6299
3       MASE    5.4933    2.3858
4         DA    0.5215    0.5000


  np.nanmean(das)
  np.nanmedian(das)
