# Load libraries

In [1]:
# load pandas and numpy
import pandas as pd
import numpy as np
import re
import os
import sys

# Read and create table for no-covariates models

In [2]:
models = ['arima', 'nhits', 'tft', 'linreg', 'xgboost', 'transformer']
datasets = ['weinstock', 'dubosson', 'colas', 'iglu', 'hall']
time_steps = 12

# create dataframes for MSE and MAE with columns for each dataset and rows for each model
df_mse = pd.DataFrame(columns=[dataset + ' ID' for dataset in datasets] + 
                      [dataset + ' OOD' for dataset in datasets], index=models)
df_mae = pd.DataFrame(columns=[dataset + ' ID' for dataset in datasets] +
                        [dataset + ' OOD' for dataset in datasets], index=models)
df_likelihood = pd.DataFrame(columns=[dataset + ' ID' for dataset in datasets] +
                        [dataset + ' OOD' for dataset in datasets], index=models[1:]) # no likelihood for ARIMA
df_calibration = pd.DataFrame(columns=[dataset + ' ID' for dataset in datasets] +
                        [dataset + ' OOD' for dataset in datasets], index=models[1:]) # no calibration for ARIMA
for model in models:
    for dataset in datasets:
        filename = f"./output/{model}_{dataset}.txt"
        if not os.path.isfile(filename):
            continue
        with open(filename, 'r') as f:
            for line in f:
                if line.startswith('ID median of (MSE, MAE):'):
                    id_mse_mae = re.findall(r'\d+\.\d+', line)
                    df_mse.loc[model, dataset + ' ID'] = float(id_mse_mae[0])
                    df_mae.loc[model, dataset + ' ID'] = float(id_mse_mae[1])
                elif line.startswith('OOD median of (MSE, MAE):'):
                    ood_mse_mae = re.findall(r'\d+\.\d+', line)
                    df_mse.loc[model, dataset + ' OOD'] = float(ood_mse_mae[0])
                    df_mae.loc[model, dataset + ' OOD'] = float(ood_mse_mae[1])
                elif line.startswith('ID likelihoods:'):
                    id_likelihoods = re.findall(r'-?\d+\.\d+', line)
                    df_likelihood.loc[model, dataset + ' ID'] = float(id_likelihoods[0])
                elif line.startswith('OOD likelihoods:'):
                    ood_likelihoods = re.findall(r'-?\d+\.\d+', line)
                    df_likelihood.loc[model, dataset + ' OOD'] = float(ood_likelihoods[0])
                elif line.startswith('ID calibration errors:'):
                    id_calib = re.findall(r'-?\d+\.\d+', line)
                    id_calib = np.mean([float(x) for x in id_calib[:time_steps]])
                    df_calibration.loc[model, dataset + ' ID'] = id_calib
                elif line.startswith('OOD calibration errors:'):
                    ood_calib = re.findall(r'-?\d+\.\d+', line)
                    ood_calib = np.mean([float(x) for x in ood_calib[:time_steps]])
                    df_calibration.loc[model, dataset + ' OOD'] = ood_calib
# sort columns for all dataframes
df_mse = df_mse.reindex(sorted(df_mse.columns), axis=1)
df_mae = df_mae.reindex(sorted(df_mae.columns), axis=1)
df_likelihood = df_likelihood.reindex(sorted(df_likelihood.columns), axis=1)
df_calibration = df_calibration.reindex(sorted(df_calibration.columns), axis=1)


In [4]:
df_mae

Unnamed: 0,colas ID,colas OOD,dubosson ID,dubosson OOD,hall ID,hall OOD,iglu ID,iglu OOD,weinstock ID,weinstock OOD
arima,4.803568,4.874943,11.059068,14.577069,7.341407,6.968186,8.667667,9.709008,11.248463,13.335658
nhits,5.038237,4.826659,14.788918,15.594684,6.567609,6.61703,12.070384,12.770672,11.208269,12.241373
tft,4.544996,4.473095,15.492347,14.525014,6.613262,6.760322,11.074825,10.229446,11.759377,12.497651
linreg,4.353447,4.413553,9.9667,11.901683,6.334506,6.624366,9.705591,9.831106,11.459088,13.088043
xgboost,5.4918,5.317744,19.087549,15.419293,6.551548,6.515803,11.501088,8.715117,11.610262,13.040385
transformer,5.649632,5.241219,14.035322,12.979963,6.783746,7.065537,13.198378,12.28025,11.215074,11.905534


# Read and create table for with-covariates models

In [3]:
models = ['nhits', 'tft', 'linreg', 'xgboost', 'transformer']
datasets = ['weinstock', 'dubosson', 'colas', 'iglu', 'hall']
time_steps = 12

# create dataframes for MSE and MAE with columns for each dataset and rows for each model
df_mse = pd.DataFrame(columns=[dataset + ' ID' for dataset in datasets] + 
                      [dataset + ' OOD' for dataset in datasets], index=models)
df_mae = pd.DataFrame(columns=[dataset + ' ID' for dataset in datasets] +
                        [dataset + ' OOD' for dataset in datasets], index=models)
df_likelihood = pd.DataFrame(columns=[dataset + ' ID' for dataset in datasets] +
                        [dataset + ' OOD' for dataset in datasets], index=models)
df_calibration = pd.DataFrame(columns=[dataset + ' ID' for dataset in datasets] +
                        [dataset + ' OOD' for dataset in datasets], index=models)
for model in models:
    for dataset in datasets:
        filename = f"./output/{model}_covariates_{dataset}.txt"
        if not os.path.isfile(filename):
            continue
        with open(filename, 'r') as f:
            for line in f:
                if line.startswith('ID median of (MSE, MAE):'):
                    id_mse_mae = re.findall(r'\d+\.\d+', line)
                    df_mse.loc[model, dataset + ' ID'] = float(id_mse_mae[0])
                    df_mae.loc[model, dataset + ' ID'] = float(id_mse_mae[1])
                elif line.startswith('OOD median of (MSE, MAE):'):
                    ood_mse_mae = re.findall(r'\d+\.\d+', line)
                    df_mse.loc[model, dataset + ' OOD'] = float(ood_mse_mae[0])
                    df_mae.loc[model, dataset + ' OOD'] = float(ood_mse_mae[1])
                elif line.startswith('ID likelihoods:'):
                    id_likelihoods = re.findall(r'-?\d+\.\d+', line)
                    df_likelihood.loc[model, dataset + ' ID'] = float(id_likelihoods[0])
                elif line.startswith('OOD likelihoods:'):
                    ood_likelihoods = re.findall(r'-?\d+\.\d+', line)
                    df_likelihood.loc[model, dataset + ' OOD'] = float(ood_likelihoods[0])
                elif line.startswith('ID calibration errors:'):
                    id_calib = re.findall(r'-?\d+\.\d+', line)
                    id_calib = np.mean([float(x) for x in id_calib[:time_steps]])
                    df_calibration.loc[model, dataset + ' ID'] = id_calib
                elif line.startswith('OOD calibration errors:'):
                    ood_calib = re.findall(r'-?\d+\.\d+', line)
                    ood_calib = np.mean([float(x) for x in ood_calib[:time_steps]])
                    df_calibration.loc[model, dataset + ' OOD'] = ood_calib
# sort columns for all dataframes
df_mse = df_mse.reindex(sorted(df_mse.columns), axis=1)
df_mae = df_mae.reindex(sorted(df_mae.columns), axis=1)
df_likelihood = df_likelihood.reindex(sorted(df_likelihood.columns), axis=1)
df_calibration = df_calibration.reindex(sorted(df_calibration.columns), axis=1)

In [7]:
df_calibration

Unnamed: 0,colas ID,colas OOD,dubosson ID,dubosson OOD,hall ID,hall OOD,iglu ID,iglu OOD,weinstock ID,weinstock OOD
nhits,0.214447,0.141414,0.080689,0.197497,0.06681,0.064741,0.071817,0.063145,0.107733,0.112227
tft,0.160679,0.086539,0.249412,0.352475,0.075971,0.083475,0.302196,0.233199,,
linreg,0.186624,0.170056,0.205757,0.47961,0.191934,0.234557,0.131142,0.551739,0.112624,0.207536
xgboost,0.088209,0.084381,0.064593,0.071753,0.092588,0.097941,0.066762,0.113061,0.102187,0.104396
transformer,0.306291,0.219041,0.178524,0.246606,0.095531,0.081821,0.212265,0.110486,0.107479,0.120045


# Clearing the results

In [1]:
import os

# Set the directory path to the folder containing the output files
folder_path = './output'

# Loop through each file in the folder
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    # Open the file in read mode if the file starts with transformer
    if filename.startswith('transformer') or \
            filename.startswith('tft') or \
                filename.startswith('linreg') or \
                    filename.startswith('xgboost') or \
                        filename.startswith('nhits'):
        with open(file_path, 'r') as f:
            lines = f.readlines()
            
        # Loop through the lines in reverse order
        for i in range(len(lines)-1, -1, -1):
            if lines[i].startswith('Best value: '):
                # Delete all lines after the line starting with "Best value: "
                del lines[i+1:]
                break
        
        # Open the file in write mode and write the modified lines back to the file
        with open(file_path, 'w') as f:
            f.writelines(lines)
