In [None]:
from zero_shot_time.data.splits import create_train_test_split, get_custom_train_test_split
from zero_shot_time.data import get_dataset
from matplotlib import pyplot as plt

import numpy as np
import pandas as pd
import seaborn
import pickle

In [None]:
def compute_maes(data, test_sets, scaler = 1):
    mea = 0
    last_meas =  0
    for i in range(len(test_sets)):
        slc = pd.concat(data[(i)*20:(i+1)*20]).groupby(['x']).median()['y']
        pred = slc[-len(test_sets[i]):]
        mea += np.mean(np.abs(pred - test_sets[i]))
        last_meas += np.abs(pred - test_sets[i]).array[-1]
    return mea / len(test_sets), mea / len(test_sets) / scaler


In [None]:





def plot_dataset_forecasts(configs, models, limit=5):
    res = []
    for dataset_name, (dataset_origin , scaler) in configs.items():
        dataset, target = get_dataset(dataset_name=dataset_origin, sub_category=dataset_name)
        if target is not None:
            # Create train, validation, test split
            _, train_sets, test_sets = create_train_test_split(dataset["train"], dataset["test"], target=target)
        else:
            _, train_sets, test_sets = get_custom_train_test_split(dataset, split_fraction=0.2)
        for model_name in models:
            data = []
            with open(f'./results/{dataset_origin}_{dataset_name}_{model_name}.data.pickle', 'rb') as f:
                preds = pickle.load(f)
                f.close()
            for p_idx, p in enumerate(preds):
                for r_index, r in enumerate(p[0][0]):
                    data.append(
                            pd.DataFrame(
                                    data={
                                        'x': np.arange(len(r)).flatten().tolist(),
                                        'y': r,
                                        'v': [p_idx] * len(r),
                                        'r': [r_index] * len(r)
                                    }
                            )
                    )
            assert len(data) == 20 * len(train_sets)
            mae, maes =  compute_maes(data, test_sets, scaler)
            print(dataset_name, model_name, mae, maes)
            res += [{'dataset': dataset_name,
                     'model': model_name,
                     'mae': mae,
                     'maes': maes}
                    ]
            for i in range(min(limit, len(test_sets))):
                # Getting the 20 predictions
                slc = pd.concat(data[(i)*20:(i+1)*20])
                train_len = len(train_sets[i])
                pred_len = len(test_sets[i])
                # Plotting prediction, taking additional steps to 'connect' the lines
                # Note that we plot the median!
                seaborn.lineplot(slc[slc.x > train_len-2], x='x', y='y', errorbar='ci',
                             estimator=np.median, color='blue')
                # Plot historical data
                plt.plot(np.arange(len(train_sets[i])), train_sets[i], color='black')
                # Plot slightly transparent future data
                plt.plot(np.arange(len(train_sets[i])-1, len(train_sets[i]) + pred_len), [train_sets[i][-1]] + test_sets[i], linewidth=2, alpha=0.7, color='orange')

                # Set title of figure
                plt.title(f'{dataset_name} {model_name} series: {i+1}')
                plt.savefig(f'./plots/{dataset_name}_{model_name}_{i+1}.pdf')

                # Show figure after saving it
                plt.show()
    return res


## Plotting Darts results

Then we consider the Darts result, plotting them with our defined function. We also write the plots to the [`./plots/`](./plots) directory.

Additionally, we create a table that allows us to view the results in a single overview for the blog post!

In [None]:
# Scaler take from original repository
darts_models = ['gpt2', 'gpt2-large', 'Llama-2-7b-hf', 'Llama-2-13b-hf']
darts_dataset = {
    'airpassenger': ('darts', 1.0),
    'beer': ('darts', 1.0),
}

darts_result = plot_dataset_forecasts(darts_dataset, darts_models, limit=1)

print(pd.DataFrame(darts_result)[['model', 'dataset', 'mae']].to_latex(index=False))

## Plotting Monash results

In [None]:
# Scaler take from original repository
monash_models = ['gpt2', 'gpt2-large', 'Llama-2-7b-hf']

monash_datasets = {
    'tourism_yearly': ('monash_tsf', 99456.0540551959), # https://github.com/ngruver/llmtime/blob/a9b451b9dad4b443d5c00652b39d922effd8870e/data/last_val_mae.csv?plain=1#L13
    'nn5_weekly':('monash_tsf', 16.708553516113007), # https://github.com/ngruver/llmtime/blob/a9b451b9dad4b443d5c00652b39d922effd8870e/data/last_val_mae.csv?plain=1#L21
}

darts_result = plot_dataset_forecasts(monash_datasets, monash_models, limit=5)

print(pd.DataFrame(darts_result)[['model', 'dataset', 'mae', 'maes']].pivot( index=[ 'model', 'dataset'], columns=[]).to_latex())