In [None]:
import os
import time
import argparse
import pandas as pd
import numpy as np

from neuralforecast.core import NeuralForecast
from neuralforecast.losses.numpy import mae, mape, mase, rmse, smape
from config_timenet import MODEL_LIST, load_model

In [None]:
def read_data(partitions_dataset, frequency):
    urls = partitions_dataset['url'].values
    df_list = []
    for url in urls:
        df_list.append(pd.read_parquet(url))
    Y_df = pd.concat(df_list, axis=0).reset_index(drop=True)
    Y_df['ds'] = pd.to_datetime(Y_df['ds']).dt.tz_localize(None)

    if frequency == '30Minutely':
        Y_df = Y_df.groupby('unique_id').tail(48*120).reset_index(drop=True)
    if frequency == '10Minutely':
        Y_df = Y_df.groupby('unique_id').tail(144*90).reset_index(drop=True)
    if frequency == 'Minutely':
        Y_df = Y_df.groupby('unique_id').tail(60*24*30).reset_index(drop=True)
    return Y_df

def run_inference(nf, Y_df, horizon):
    Y_hat_df = nf.cross_validation(df=Y_df,
                                   n_windows=1,
                                   fit_models=False,
                                   use_init_models=False).reset_index()
    Y_hat_df = Y_hat_df.groupby('unique_id').tail(horizon)
    print('nulls:', Y_hat_df['y'].isnull().sum())
    return Y_hat_df

def compute_losses(Y_hat_df, y_hat_col, dataset, subdataset, frequency):
    mae_loss = mae(y=Y_hat_df['y'], y_hat=Y_hat_df[y_hat_col])
    mape_loss = mape(y=Y_hat_df['y'], y_hat=Y_hat_df[y_hat_col])
    rmse_loss = rmse(y=Y_hat_df['y'],y_hat=Y_hat_df[y_hat_col])
    smape_loss = smape(y=Y_hat_df['y'], y_hat=Y_hat_df[y_hat_col])

    row = pd.DataFrame({'dataset':[dataset], 'subdataset': [subdataset], 'frequency':frequency, 'mae': [mae_loss], 'mape':[mape_loss], 'rmse':[rmse_loss], 'smape':[smape_loss]})
    df_results = pd.concat([df_results, row], ignore_index=True)
    return df_results

def compute_losses_by_ts(Y_hat_df, y_hat_col, model_name, dataset, subdataset, frequency):
    mae_lambda = lambda x: mae(y=x['y'], y_hat=x[y_hat_col])
    mape_lambda = lambda x: mape(y=x['y'], y_hat=x[y_hat_col])
    rmse_lambda = lambda x: rmse(y=x['y'], y_hat=x[y_hat_col])
    smape_lambda = lambda x: smape(y=x['y'], y_hat=x[y_hat_col])

    df_metric_by_id = pd.DataFrame(columns=['unique_id', 'dataset', 'subdataset','metric', 'frequency', model_name])
    for metric in [mae_lambda, mape_lambda, rmse_lambda, smape_lambda]:
        Y_metric = Y_hat_df.groupby('unique_id').apply(metric)
        if metric == mae_lambda:
            metric = 'mae'
        elif metric == mape_lambda:
            metric = 'mape'
        elif metric == rmse_lambda:
            metric = 'rmse'
        elif metric == smape_lambda:
            metric = 'smape'
        Y_metric = pd.DataFrame({'unique_id': Y_metric.index, 'dataset': dataset, 'subdataset': subdataset, 'metric': metric, 'frequency': frequency, 'NHITS': Y_metric.values})
        df_metric_by_id = pd.concat([df_metric_by_id, Y_metric], ignore_index=True)
    return df_metric_by_id

## Yearly

In [None]:
# Parameters
frequency = 'Yearly'
source_dataset = 'timenet'
model = 'nhits_30_1024_yearly'
experiment_id = '20230626'
horizon = 1

# Run inference
nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:

        if subdataset == 'M3':
            freq = 'Y'
        elif subdataset == 'M4':
            freq = 'AS'
        nf.freq = pd.tseries.frequencies.to_offset(freq)

        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency)
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_yearly.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')

## Quarterly

In [None]:
# Parameters
frequency = 'Quarterly'
source_dataset = 'timenet'
model = 'nhits_30_1024_quarterly'
experiment_id = '20230626'
horizon = 4

# Run inference
nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:
        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        if subdataset == 'M3':
            freq = 'Q'
        elif subdataset == 'M4':
            freq = 'QS'
        nf.freq = pd.tseries.frequencies.to_offset(freq)

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency)
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_quarterly.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')

# Monthly

In [None]:
# Parameters
frequency = 'Monthly'
source_dataset = 'timenet'
model = 'nhits_30_1024_monthly'
experiment_id = '20230626'
horizon = 12

# Run inference
nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]

print('Removing Wiki')
print('Partitions before: ', parts_df.shape)
parts_df = parts_df[parts_df['subdataset'] != 'Mini']
print('Partitions before: ', parts_df.shape)

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:

        if subdataset == 'M4':
            freq = 'MS'
        elif subdataset == 'M3':
            freq = 'M'
        elif subdataset == 'hospital':
            freq = 'MS'
        elif subdataset == 'car_parts':
            freq = 'MS'
        nf.freq = pd.tseries.frequencies.to_offset(freq)

        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency)        
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_monthly.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')

## Weekly

In [None]:
# Parameters
frequency = 'Weekly'
source_dataset = 'timenet'
model = 'nhits_30_1024_weekly'
experiment_id = '20230626'
horizon = 1

nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]

print('Removing Wiki')
print('Partitions before: ', parts_df.shape)
parts_df = parts_df[parts_df['subdataset'] != 'Mini']
print('Partitions before: ', parts_df.shape)

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:

        if subdataset == 'ILI':
            freq = 'W-TUE'
        elif subdataset == 'electricity':
            freq = 'W-SUN'
        elif subdataset == 'nn5':
            freq = 'W-MON'
        nf.freq = pd.tseries.frequencies.to_offset(freq)

        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency) 
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_weekly.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')

# Daily

In [None]:
# Parameters
frequency = 'Daily'
source_dataset = 'timenet'
model = 'nhits_30_1024_daily'
experiment_id = '20230626'
horizon = 7

nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

# Run inference
parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]
print('Removing Wiki')
print('Partitions before: ', parts_df.shape)
parts_df = parts_df[parts_df['subdataset'] != 'Mini']
print('Partitions before: ', parts_df.shape)

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:

        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency)         
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_daily.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')

## Hourly

In [None]:
# Parameters
frequency = 'Hourly'
source_dataset = 'timenet'
model = 'nhits_30_1024_hourly'
experiment_id = '20230626'
horizon = 24

# Run inference
nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:
        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency)        
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_hourly.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')

## 30 Minutely

In [None]:
# Parameters
frequency = '30Minutely'
source_dataset = 'timenet'
model = 'nhits_30_1024_30minutely'
experiment_id = '20230626'
horizon = 48

# Run inference
nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:

        freq = '30T'
        nf.freq = pd.tseries.frequencies.to_offset(freq)

        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency)            
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_30minutely.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')

## 15minutely

In [None]:
# Parameters
frequency = '15Minutely'
source_dataset = 'timenet'
model = 'nhits_30_1024_15minutely'
experiment_id = '20230626'
horizon = 96

# Run inference
nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]
print('Removing ECL')
print('Partitions before: ', parts_df.shape)
parts_df = parts_df[parts_df['subdataset'] != 'ECL']
print('Partitions before: ', parts_df.shape)

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:

        freq = '15T'
        nf.freq = pd.tseries.frequencies.to_offset(freq)

        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency)        
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_15minutely.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')

## 10minutely

In [None]:
# Parameters
frequency = '10Minutely'
source_dataset = 'timenet'
model = 'nhits_30_1024_10minutely'
experiment_id = '20230626'
horizon = 144

# Run inference
nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:

        freq = '10T'
        nf.freq = pd.tseries.frequencies.to_offset(freq)

        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency)           
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_10minutely.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')

## Minutely

In [None]:
# Parameters
frequency = 'Minutely'
source_dataset = 'timenet'
model = 'nhits_30_1024_minutely'
experiment_id = '20230626'
horizon = 60

# Run inference
nf = NeuralForecast.load(path=
        f'./results/stored_models/{source_dataset}/{model}/{experiment_id}/')

parts_df = pd.read_csv('partitions_df.csv')
parts_df = parts_df[parts_df['frequency'] == frequency]

df_results = pd.DataFrame(columns = ['dataset', 'subdataset', 'frequency', 'mae', 'mape', 'rmse', 'smape'])
datasets = parts_df['dataset'].unique()
print('Datasets', datasets)
for dataset in datasets:
    parts_dataset = parts_df[parts_df['dataset'] == dataset]
    subdatasets = parts_dataset['subdataset'].unique()
    print('Subdatasets', subdatasets)
    for subdataset in subdatasets:

        freq = 'T'
        nf.freq = pd.tseries.frequencies.to_offset(freq)

        subparts_dataset = parts_dataset[parts_dataset['subdataset'] == subdataset]

        # Read Data
        Y_df = read_data(partitions_dataset=subparts_dataset)

        # Run inference 
        Y_hat_df = run_inference(nf=nf, Y_df=Y_df, horizon=horizon)
        
        # Compute metrics
        df_results = compute_losses(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', dataset=dataset, subdataset=subdataset, frequency=frequency)        
        df_metric_by_id = compute_losses_by_ts(Y_hat_df=Y_hat_df, y_hat_col='NHITS-median', model_name='NHITS',
                                               dataset=dataset, subdataset=subdataset, frequency=frequency)

        df_results.to_csv('results_minutely.csv', index=False)
        df_metric_by_id.to_parquet(f'./results/final/{dataset}_{subdataset}_{frequency}.parquet')