## Merging results of RNN plus networks

In [1]:
import os, sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def plotFunction(data, title):
    # Fixing random state for reproducibility
    np.random.seed(19680801)
    plt.rcdefaults()
    fig, ax = plt.subplots()

    # Example data
    ax.barh(data.index, data.values, align='center')
    ax.set_yticks(data.index)
    ax.set_yticklabels(data.index)
    ax.invert_yaxis()  # labels read top-to-bottom
    ax.set_xlabel('Number of Dataset')
    ax.set_title(title)

    for i, v in enumerate(data.values):
        ax.text(v + 0.5, i +0.05, str(v), color='black', fontweight='bold')
    plt.show()
    
results_dir_i = "./results/2_rnn_plus/rnn_plus_v1_30112021_ES_DS_UnRolled_LS/"
results_dir_o = "./results/2_rnn_plus/rnn_plus_v1_30112021_ES_DS_UnRolled_LS_merged/"
results_list = sorted(os.listdir(results_dir_i))[1:]
merged_results = None
count = -1
prefix = 'RNN_plus_v1' #'mse'

if not os.path.exists(results_dir_o):
    os.makedirs(results_dir_o)
    
with open('valacc.mse.txt') as f:
    lines = ([line.replace('\n', '').split(' ')[3:6:2] for line in f.readlines()[5:] if line.strip('\n')])

for line in lines:
    count = count + 1 if count < 2 else 0
    fn = line[0].split('/')[2].split('.')
    fn_i = f"{prefix}_{fn[1].split('=')[-1]}_{fn[2].split('=')[-1]}_{fn[3].split('=')[-1]}_{fn[4].split('s')[-1]}_{fn[5].split('n')[-1]}_{fn[6]}.csv"
    fn_o = f"{prefix}_{fn[1].split('=')[-1]}_{fn[2].split('=')[-1]}_{fn[3].split('=')[-1]}_{fn[4].split('s')[-1]}_{fn[5].split('n')[-1]}_{fn[6]}_merged.csv"
    resultpath_i = results_dir_i + fn_i
    resultpath_o = results_dir_o + fn_o
    if not os.path.exists(resultpath_o):
        result = pd.read_csv(resultpath_i, index_col=0)
    else:
        result = pd.read_csv(resultpath_o, index_col=0)
        
    result.loc[f'olstm_{count}'] = round(float(line[1]),5)
    result.to_csv(resultpath_o, index=True)

for results_item in sorted(os.listdir(results_dir_o)):
    resultpath = results_dir_o + results_item
    result = pd.read_csv(resultpath, index_col=0)
    rnnplus_avg = float(result.loc['rnnplus_avg'].values[0])
    olstm_avg = sum([float(result.loc['olstm_0'].values[0]), float(result.loc['olstm_1'].values[0]), float(result.loc['olstm_2'].values[0])])/3.0
    delta = (rnnplus_avg - olstm_avg)
    if delta > 0.01:
        delta_bool = 'better'
    elif delta < -0.01:
        delta_bool = 'worse'
    else:
        delta_bool = 'same'
    result = result.drop(labels=['olstm_0', 'olstm_1', 'olstm_2'], axis=0)
    result.loc[f'olstm_avg'] = round(olstm_avg,6)
    result.loc[f'delta'] = round(delta, 6)
    result.loc[f'isbetter'] = delta_bool
    result.to_csv(resultpath, index=True)
    
for results_item in sorted(os.listdir(results_dir_o)):
    resultpath = results_dir_o + results_item
    result = pd.read_csv(resultpath, index_col=0)
    merged_results = result if merged_results is None else pd.concat([merged_results, result], axis=1)

# merged_results = merged_results.drop(index='tuner/trial_id')
merged_results.to_csv(results_dir_o + 'rnn_plus_v1_30112021_ES_DS_UnRolled_LS_mergedresult.csv', index=True)

isbetter_pd = merged_results.T.groupby(['isbetter']).count()['dataset_no'].sort_values(ascending=False)
print(f'++ Number of isBetter: \033[1m{isbetter_pd.index[0]}\033[0m with \033[1m{isbetter_pd[0]}\033[0m/180')
print(f'++ Number of isBetter: \033[1m{isbetter_pd.index[1]}\033[0m with \033[1m{isbetter_pd[1]}\033[0m/180')
print(f'++ Number of isBetter: \033[1m{isbetter_pd.index[2]}\033[0m with \033[1m{isbetter_pd[2]}\033[0m/180')

++ Number of isBetter: [1msame[0m with [1m91[0m/180
++ Number of isBetter: [1mworse[0m with [1m75[0m/180
++ Number of isBetter: [1mbetter[0m with [1m14[0m/180
