In [1]:
import os
import numpy as np
import pandas as pd

In [148]:
results_dir = '/srv/scratch/z5370003/projects/03_extremes/DeepGR4J-Extremes/results/top5_comparison/'

In [149]:
def get_avg_results(results_dir):

    models = os.listdir(results_dir)
    
    avg_results = []
    
    for model in models:
    
        model_dir = os.path.join(results_dir, model)
        states = os.listdir(model_dir)
    
        avg_model_results = []
        
        for state in states:
            
            state_results = pd.read_csv(os.path.join(model_dir, state, 'results.csv'), index_col='Unnamed: 0')
            state_results = state_results.set_index(['Metric', 'Quantile', 'Dataset']).mean(axis=1).reset_index().rename(columns={0:state})
            state_results = state_results.loc[(state_results.Quantile==0.5) | (state_results.Quantile.isnull())].drop(columns=['Quantile']).sort_values(by='Metric')
            state_results = state_results.set_index(['Metric', 'Dataset']).T
            state_results.index.name = 'State'
        
            avg_model_results.append(state_results.reset_index())
        
        
        avg_model_results = pd.concat(avg_model_results)
        avg_model_results[('RMSE', 'train')] = np.sqrt(avg_model_results[('MSE', 'train')])
        avg_model_results[('RMSE', 'validation')] = np.sqrt(avg_model_results[('MSE', 'validation')])
        
        
        avg_model_results = avg_model_results[['State', 'RMSE', 'NSE', 'Confidence Score']]
        avg_model_results.loc[:, 'Model'] = model
        avg_results.append(avg_model_results)
    
    
    avg_results = pd.concat(avg_results).sort_values(['State', 'Model']).set_index(['State', 'Model'])
    avg_results = avg_results[['RMSE', 'NSE', 'Confidence Score']]

    return avg_results

In [151]:
top5_results = get_avg_results(results_dir)
print(top5_results.to_latex(float_format='%.4f'))
# top5_results

\begin{tabular}{llrrrrrr}
\toprule
 & Metric & \multicolumn{2}{r}{RMSE} & \multicolumn{2}{r}{NSE} & \multicolumn{2}{r}{Confidence Score} \\
 & Dataset & train & validation & train & validation & train & validation \\
State & Model &  &  &  &  &  &  \\
\midrule
\multirow[t]{4}{*}{NSW} & cnn & 3.7952 & 2.7044 & 0.3028 & 0.4233 & 3.5332 & 3.9728 \\
 & lstm & 4.3458 & 3.1349 & 0.0675 & 0.1588 & 9.1795 & 6.6946 \\
 & qdeepgr4j_cnn & 3.6323 & 2.6523 & 0.3632 & 0.4379 & 3.0930 & 4.1626 \\
 & qdeepgr4j_lstm & 4.2833 & 3.0742 & 0.0931 & 0.1909 & 9.0296 & 6.5693 \\
\cline{1-8}
\multirow[t]{4}{*}{NT} & cnn & 2.2627 & 3.0055 & 0.5778 & 0.5836 & 2.1111 & 4.2026 \\
 & lstm & 2.6709 & 3.4629 & 0.4227 & 0.4420 & 4.5600 & 6.6220 \\
 & qdeepgr4j_cnn & 2.1883 & 2.9701 & 0.6061 & 0.5909 & 1.8246 & 4.3851 \\
 & qdeepgr4j_lstm & 2.6106 & 3.3700 & 0.4420 & 0.4635 & 4.2913 & 6.2252 \\
\cline{1-8}
\multirow[t]{4}{*}{QLD} & cnn & 7.4326 & 6.5456 & 0.5562 & 0.5642 & 8.5925 & 12.1544 \\
 & lstm & 9.0420 & 7.7228 

In [147]:
results_dir = '/srv/scratch/z5370003/projects/03_extremes/DeepGR4J-Extremes/results/arch_comparison/'
arch_results = get_avg_results(results_dir)
print(arch_results.to_latex(float_format='%.4f'))

\begin{tabular}{llrrrrrr}
\toprule
 & Metric & \multicolumn{2}{r}{RMSE} & \multicolumn{2}{r}{NSE} & \multicolumn{2}{r}{Confidence Score} \\
 & Dataset & train & validation & train & validation & train & validation \\
State & Model &  &  &  &  &  &  \\
\midrule
\multirow[t]{4}{*}{SA} & qdeepgr4j_cnn_fillna_2 & 0.4866 & 0.3853 & 0.7104 & 0.6323 & 0.3472 & 0.4561 \\
 & qdeepgr4j_lstm_fillna_2 & 0.6259 & 0.4487 & 0.4852 & 0.4724 & 1.0712 & 0.7078 \\
 & qdeepgr4j_mlp_fillna_2 & 0.5616 & 0.4131 & 0.6499 & 0.6160 & 1.4220 & 0.9338 \\
 & qdeepgr4j_rnn_fillna_2 & 0.6684 & 0.4716 & 0.3386 & 0.3551 & 0.9272 & 0.6244 \\
\cline{1-8}
\bottomrule
\end{tabular}

