In [8]:
pip install scikit-learn

You should consider upgrading via the '/home/../multiTS/NFT/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [7]:
import pandas as pd
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
import sys
sys.path.append('/home/../multiTS/NFT/models/NFT')
from NFT import NFT

import torch.nn.functional as F

sys.path.append('NFT/')
from dicts import data_to_num_vars_dict, data_to_label_len, data_to_num_nft_blocks, data_to_num_of_series, data_to_steps, single_data_to_series_list, noaa_series_to_years
from lists import ECG

sys.path.append('NFT/')
from models.training_functions import get_data, evaluate_model, save_model, get_model_name, add_results_to_excel, get_path


def reshape_test_y(test_y):
    """
    Reshape test_y (3D) into 2D to match forecasted values.

    Args:
        test_y (np.array or pd.DataFrame): Test data with shape (num_data_points, horizon, num_vars).

    Returns:
        pd.DataFrame: Reshaped data with shape (num_data_points * horizon, num_vars).
    """
    # Combine all samples into one horizon set
    reshaped = test_y.reshape(-1, test_y.shape[-1])  # Combine all rows
    return pd.DataFrame(reshaped)

def reshape_data_for_var(data):
    num_data_points, lookback, num_vars = data.shape
    reshaped = data.reshape(num_data_points * lookback, num_vars)
    return pd.DataFrame(reshaped)

def forecast_with_var(train, horizon):
    # Train the VAR model
    model = VAR(train)
    fitted_model = model.fit()

    # Forecast for the given horizon
    lag_order = fitted_model.k_ar
    forecast_input = train.values[-lag_order:]  # Use last 'lag_order' observations
    forecast = fitted_model.forecast(y=forecast_input, steps=horizon)

    # Convert forecast to DataFrame
    forecast_df = pd.DataFrame(forecast, columns=train.columns)
    return forecast_df

def calculate_mse(forecast_df, test):
    """
    Calculate Mean Squared Error (MSE) between forecasted and actual values.

    Args:
        forecast_df (pd.DataFrame): Forecasted values with shape (horizon, num_vars).
        test (pd.DataFrame): Actual values with shape (horizon, num_vars).

    Returns:
        float: MSE value.
    """
    # Ensure the number of samples matches
    if len(forecast_df) > len(test):
        forecast_df = forecast_df.iloc[:len(test)]
    elif len(forecast_df) < len(test):
        test = test.iloc[:len(forecast_df)]
    
    mse = mean_squared_error(test.values.flatten(), forecast_df.values.flatten())
    mae = mean_absolute_error(test.values.flatten(), forecast_df.values.flatten())
    # mse = F.mse_loss(test.values, forecast_df.values)
    return mse, mae

def calculate_var_mse(data, lookback, horizon, series):          
    num_of_vars=data_to_num_vars_dict.get(data, 5) 
    n_series=data_to_num_of_series.get(data, 1)
   
    train_X, train_y, val_X, val_y, test_X, test_y = get_data(
        data=data, 
        lookback=lookback, 
        horizon=horizon,
        n_series=n_series,
        print_stats=False,
        series=series,
        )
    

    # Reshape train_X and test_X for VAR
    train_data = reshape_data_for_var(train_X)

    # Forecast
    horizon = test_y.shape[1]  # Horizon length
    forecasted_values = forecast_with_var(train_data, horizon)

    # Reshape test_y properly
    test_y_reshaped = reshape_test_y(test_y)

    # Calculate MSE
    mse, mae = calculate_mse(forecasted_values, test_y_reshaped)
    
    return mse, mae
    
    
for data in ['noaa']:
    print(data)
    for lookback, horizon in data_to_steps[data]:
        if data in ['eeg_single', 'ecg_single', 'noaa']:
            mse_lst, mae_lst = [], []
            for series in single_data_to_series_list[data]:
                mse, mae = calculate_var_mse(data, lookback, horizon, series)
                mse_lst.append(mse)
                mae_lst.append(mae)
            mse = sum(mse_lst) / len(mse_lst)
            mae = sum(mae_lst) / len(mae_lst)
        else:
            mse, mae = calculate_var_mse(data, lookback, horizon, series=None)

        # Print results
        print(f"horizon={horizon} (MSE, MAE):", mse, mae)


ModuleNotFoundError: No module named 'torch'

In [None]:
chorales:
    horizon=1 (MSE): 4.558118131312799
horizon=2 (MSE): 4.437389239940906
horizon=3 (MSE): 4.086383812827138
horizon=4 (MSE): 4.126719253199453


traffic: 
    horizon=1 (MSE): 3.7043708654670446
horizon=16 (MSE): 0.8017831806666413
horizon=32 (MSE): 4.110379750081922
horizon=48 (MSE): 2.4100715856088906

air: horizon=5 (MSE): 1.9876120151389391
horizon=10 (MSE): 2.129416452929108
horizon=15 (MSE): 1.6726539743318432
horizon=25 (MSE): 1.5275069208852725

noaa
horizon=15 (MSE): 1.2748951189278355
horizon=30 (MSE): 1.7581442898917465
horizon=60 (MSE): 2.228448580569516
horizon=90 (MSE): 2.125046624927465

electricity
horizon=1 (MSE): 2.220926929158964
horizon=16 (MSE): 1.7718174092451542
horizon=32 (MSE): 0.6443933882876132
exchange
horizon=96 (MSE): 1.2140950859160469
horizon=192 (MSE): 1.1333677058216158
horizon=336 (MSE): 2.635548336343764
horizon=720 (MSE): 2.76881348627615
ettm1
horizon=48 (MSE): 2.085844992420397
horizon=96 (MSE): 2.176134426072294
horizon=192 (MSE): 1.9188860400942036
horizon=336 (MSE): 1.5137154715356693

ecg_single
horizon=1 (MSE): 0.9665788007098376
horizon=10 (MSE): 1.4244523380103224
horizon=25 (MSE): 1.9487501930273452
horizon=100 (MSE): 0.9742798693001177

eeg_singel
horizon=1 (MSE): 1.3128353059270872
horizon=10 (MSE): 1.0445649753201618
horizon=25 (MSE): 1.3738529271041866

noaa 174
horizon=96 (MSE, MAE): 1.0011750647840578 0.7907018520813343
horizon=192 (MSE, MAE): 1.2856816289511335 0.9549892477975964
horizon=336 (MSE, MAE): 1.1554943603341599 0.8828054171710957
horizon=720 (MSE, MAE): 1.1243328121104323 0.8485567138925357

SyntaxError: invalid syntax (1761768909.py, line 1)