In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from joblib import Parallel, delayed
from sklearn.preprocessing import StandardScaler
from utils import clean_daily_series, build_windows, rmse, mae, smape, mase, directional_accuracy, print_evaluation_table

In [2]:
daily_train = pd.read_csv('../src/data/m4_forecasting/Daily-train.csv')
daily_test = pd.read_csv('../src/data/m4_forecasting/Daily-test.csv')

In [3]:

def forecast_linear_regression(train_row, L=30, H=7):
    # Build supervised dataset
    X, Y = build_windows(train_row, L=L, H=H)
    # Fit separate models for each horizon step
    y_pred = np.zeros(H)
    for h in range(H):
        lr = LinearRegression()
        lr.fit(X, Y[:,h])
        y_pred[h] = lr.predict(train_row[-L:].reshape(1,-1))[0]
    return y_pred


In [13]:
def evaluate_forecast(train_row, test_row, L=30, H=14):
    train_ts = clean_daily_series(train_row)
    test_ts = clean_daily_series(test_row)

    # Enforce M4 horizon limit (daily test series have length of 14)
    H_eval = min(H, len(test_ts))

    scaler = StandardScaler()
    train_ts_scaled = scaler.fit_transform(
        train_ts.to_numpy().reshape(-1, 1)
    ).flatten()

    y_pred = forecast_linear_regression(train_ts_scaled, L=L, H=H_eval)
    y_pred = np.atleast_1d(y_pred)

    y_pred = scaler.inverse_transform(
        y_pred.reshape(-1, 1)
    ).flatten()
    
    y_true = np.atleast_1d(test_ts)

    
    return (
        rmse(y_true[:H_eval], y_pred[:H_eval]),
        mae(y_true[:H_eval], y_pred[:H_eval]),
        smape(y_true[:H_eval], y_pred[:H_eval]),
        mase(y_true[:H_eval], y_pred[:H_eval], train_ts, m=7),
        directional_accuracy(y_true[:H_eval], y_pred[:H_eval])
    )


In [14]:
horizons = [1, 7, 14]
L = 30
NUM_SERIES = 50

daily_train_copy = daily_train.copy()
daily_test_copy = daily_test.copy()

all_results = {}

for H in horizons:
    print(f"Evaluating horizon H={H}")
    results = Parallel(n_jobs=-1, backend="loky", verbose=10)(
        delayed(evaluate_forecast)(daily_train_copy.iloc[i], daily_test_copy.iloc[i], L=L, H=H)
        for i in range(NUM_SERIES)
    )
    all_results[H] = results


Evaluating horizon H=1


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.08987641334533691s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0366671085357666s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.2s


Evaluating horizon H=7


[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.046411752700805664s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


Evaluating horizon H=14


[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.7s finished


In [15]:
for H, results in all_results.items():
    # results: list of tuples per series
    rmses, maes, smapes, mases, das = zip(*results)

    print(f"\nHorizon: {H}")
    print_evaluation_table(rmses, maes, smapes, mases, das)



Horizon: 1
      Metric     Mean   Median
0       RMSE  53.7175  17.1297
1        MAE  53.7175  17.1297
2  sMAPE (%)   1.2360   0.5717
3       MASE   0.4522   0.2948
4         DA      NaN      NaN

Horizon: 7
      Metric      Mean   Median
0       RMSE  145.1147  63.9750
1        MAE  128.2930  54.5485
2  sMAPE (%)    3.1352   1.6792
3       MASE    1.0399   0.7315
4         DA    0.4933   0.5000

Horizon: 14
      Metric      Mean   Median
0       RMSE  194.0205  95.6222
1        MAE  165.9755  72.3974
2  sMAPE (%)    4.1642   1.9397
3       MASE    1.5469   0.8377
4         DA    0.5385   0.5769


  np.nanmean(das)
  np.nanmedian(das)
