# Code to make the additional tables

### Imports

In [1]:
import numpy as np
import os
from os.path import join
import pandas as pd

results_dir = 'results'

### Load data

In [2]:
files_nrmse = [join(results_dir, f) for f in os.listdir(results_dir) if 'nrmse' in f]
files_maae  = [join(results_dir, f) for f in os.listdir(results_dir) if 'maae'  in f]

N = len(files_nrmse)
assert len(files_maae) == len(files_nrmse)

### Process data

In [3]:
df_nrmse = [pd.read_csv(f, index_col=0).drop(['avg_all', 'avg_6']).drop(columns='zero') for f in files_nrmse]
df_maae  = [pd.read_csv(f, index_col=0).drop(['avg_all', 'avg_6']).drop(columns='zero') for f in files_maae]


cols = df_nrmse[0].columns[df_nrmse[0].columns != 'K']


for i in range(N):
    for c in cols:
        df_nrmse[i][c] /= df_nrmse[i].K
        df_maae[ i][c] /= df_maae[ i].K
        
        df_nrmse[i][c] -= 1
        df_maae[ i][c] -= 1
        

        # Check for and impute any outlier values.
        # This is just an issue with DKF_GP.
        # Every one in a while the GP regression will produce an extreme nRMSE value.
        if True:
            outlier_thresh = 10
            outliers = (df_nrmse[i][c] > outlier_thresh) + (df_maae[i][c] > outlier_thresh)
            if outliers.any():
                df_nrmse[i][c][outliers] = df_nrmse[i][c][outliers == False].mean()
                df_maae[ i][c][outliers] = df_maae[ i][c][outliers == False].mean()

In [4]:
# The Kalman filter column K is the same across all trials,
# so we will have no problems with just taking the average of all dataframes.

nrmse = sum(df_nrmse) / N
maae  = sum(df_maae ) / N

nrmse = nrmse.set_index(pd.Index([f'Trial {int(x)+1}' for x in nrmse.T.columns]))
maae  = maae.set_index( pd.Index([f'Trial {int(x)+1}' for x in maae.T.columns ]))

nrmse.loc['Average'] = nrmse.mean()
maae.loc[ 'Average'] = maae.mean()

### Display Final Results
The same trends were observed in the Average and the Average across the first 6 trials.

In [5]:
#display(nrmse.T.round(2))
display((nrmse.T.round(2) * 100).astype(int))

Unnamed: 0,Trial 1,Trial 2,Trial 3,Trial 4,Trial 5,Trial 6,Trial 9,Trial 10,Average
K,76,94,79,79,78,76,101,92,84
NN,-19,-17,-21,-23,-20,-23,-5,-7,-17
DKF_NN,-20,-11,-12,-16,-17,-20,-4,-9,-14
GP,-3,-9,-7,-8,-10,-6,-7,-14,-8
DKF_GP,-6,-11,-8,-8,-12,-8,-8,-16,-10
NW,-15,-18,-18,-24,-20,-21,-5,3,-15
DKF_NW,-18,-18,-12,-19,-18,-20,-5,2,-14
LSTM,-22,12,-23,-21,-23,-25,-7,-12,-15
DKF_LSTM,-15,84,-8,-7,-15,-14,-4,-12,1
EKF,2,10,11,18,14,8,5,12,10


In [9]:
#display(maae.T.round(2))
display((maae.T.round(2) * 100).astype(int))

Unnamed: 0,Trial 1,Trial 2,Trial 3,Trial 4,Trial 5,Trial 6,Trial 9,Trial 10,Average
K,88,96,103,93,97,93,97,103,96
NN,1,3,-17,-7,-15,-15,4,3,-5
DKF_NN,-6,2,-17,-10,-19,-20,-3,1,-9
GP,2,12,-18,-3,-13,-16,6,-4,-4
DKF_GP,-5,11,-19,-7,-19,-20,0,-8,-8
NW,-8,1,-21,-15,-21,-25,-1,-6,-12
DKF_NW,-14,-1,-21,-16,-25,-28,-5,-5,-14
LSTM,-6,15,-20,-13,-20,-20,-3,-5,-9
DKF_LSTM,-6,24,-18,-12,-20,-20,-2,-5,-7
EKF,0,8,1,1,-4,-2,3,19,3
