In [2]:
import os

import dill
import pandas as pd
import numpy as np

RESULTS_DIR = 'results'

algs = {
    '-1': 'ACS2',
    '3-EER': 'ACS2-EER',
    '3-ER': 'ACS2-ER',
    'SQ': 'Q-Learning',
    'DQN': 'DQN'
}

# Bayes Analysis
- [visualization gallery](https://www.machinelearningplus.com/plots/top-50-matplotlib-visualizations-the-master-plots-python/)

ER - Experience Replay
EER - Episode Experience Replay - replay whole episode without assigning priorities (each one has same probability)

In [1]:
def load_data(environment, metric):
    data_path = os.path.join(RESULTS_DIR, environment, metric)
    models = {}
    for f in os.listdir(data_path):
        model_id = f.split('.')[0]
        model = dill.load(open(os.path.join(data_path, f), 'rb'))
        models[model_id] = model
    return models

In [13]:
def to_df(arr):
    df = pd.DataFrame(arr)
    df.set_index('env', inplace=True)
    df.sort_index(inplace=True)
    return df

def fmt_cell(data_arr):
    return f"{np.mean(data_arr):.2f} ± {np.std(data_arr):.2f}"

## Steps

In [8]:
steps_arr = []

for env in os.listdir(RESULTS_DIR):
    env_metrics = load_data(env, 'steps')
    steps_arr.append({
        'env': env,
        'ACS2': fmt_cell(env_metrics['-1'].mu),
        'ACS2 ER': fmt_cell(env_metrics['3-ER'].mu),
        'ACS2 EER': fmt_cell(env_metrics['3-EER'].mu),
        'Q-Learning': fmt_cell(env_metrics['SQ'].mu),
        'DQN': fmt_cell(env_metrics['DQN'].mu),
    })

In [14]:
steps_df = to_df(steps_arr)
steps_df

Unnamed: 0_level_0,ACS2,ACS2 ER,ACS2 EER,Q-Learning,DQN
env,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MAZE4,9.17 ± 0.09,8.22 ± 0.04,8.13 ± 0.04,12.10 ± 0.19,10.73 ± 0.19
MAZE5,12.51 ± 0.17,11.01 ± 0.07,10.70 ± 0.05,17.27 ± 0.39,14.48 ± 0.20
MAZE7,17.19 ± 0.14,15.71 ± 0.05,15.52 ± 0.04,33.33 ± 0.46,25.18 ± 0.38
MAZEF1,3.73 ± 0.03,3.79 ± 0.01,3.69 ± 0.01,4.01 ± 0.03,3.96 ± 0.03
MAZEF2,5.51 ± 0.03,5.42 ± 0.01,5.38 ± 0.01,6.52 ± 0.07,6.20 ± 0.10
MAZEF3,7.36 ± 0.04,7.28 ± 0.02,7.26 ± 0.01,10.40 ± 0.17,9.02 ± 0.11
MAZET2,5.74 ± 0.02,5.84 ± 0.01,5.80 ± 0.01,6.88 ± 0.09,6.39 ± 0.06
MAZET3,7.71 ± 0.03,7.76 ± 0.01,7.76 ± 0.01,9.30 ± 0.21,8.72 ± 0.09
MAZEX1,20.93 ± 0.21,18.96 ± 0.18,18.61 ± 0.12,43.17 ± 0.21,39.03 ± 0.43
MAZEX2,22.99 ± 0.23,20.65 ± 0.19,20.19 ± 0.13,43.80 ± 0.15,38.49 ± 0.28


In [20]:
print(steps_df.to_latex())

\begin{tabular}{llllll}
\toprule
{} &          ACS2 &       ACS2 ER &      ACS2 EER &    Q-Learning &           DQN \\
env    &               &               &               &               &               \\
\midrule
MAZE4  &   9.17 ± 0.09 &   8.22 ± 0.04 &   8.13 ± 0.04 &  12.10 ± 0.19 &  10.73 ± 0.19 \\
MAZE5  &  12.51 ± 0.17 &  11.01 ± 0.07 &  10.70 ± 0.05 &  17.27 ± 0.39 &  14.48 ± 0.20 \\
MAZE7  &  17.19 ± 0.14 &  15.71 ± 0.05 &  15.52 ± 0.04 &  33.33 ± 0.46 &  25.18 ± 0.38 \\
MAZEF1 &   3.73 ± 0.03 &   3.79 ± 0.01 &   3.69 ± 0.01 &   4.01 ± 0.03 &   3.96 ± 0.03 \\
MAZEF2 &   5.51 ± 0.03 &   5.42 ± 0.01 &   5.38 ± 0.01 &   6.52 ± 0.07 &   6.20 ± 0.10 \\
MAZEF3 &   7.36 ± 0.04 &   7.28 ± 0.02 &   7.26 ± 0.01 &  10.40 ± 0.17 &   9.02 ± 0.11 \\
MAZET2 &   5.74 ± 0.02 &   5.84 ± 0.01 &   5.80 ± 0.01 &   6.88 ± 0.09 &   6.39 ± 0.06 \\
MAZET3 &   7.71 ± 0.03 &   7.76 ± 0.01 &   7.76 ± 0.01 &   9.30 ± 0.21 &   8.72 ± 0.09 \\
MAZEX1 &  20.93 ± 0.21 &  18.96 ± 0.18 &  18.61 ± 0.12 &  43.1

  print(steps_df.to_latex())


## Knowledge

In [15]:
knowledge_arr = []

for env in os.listdir(RESULTS_DIR):
    env_metrics = load_data(env, 'knowledge95')
    knowledge_arr.append({
        'env': env,
        'ACS2': fmt_cell(env_metrics['-1'].mu),
        'ACS2 ER': fmt_cell(env_metrics['3-ER'].mu),
        'ACS2 EER': fmt_cell(env_metrics['3-EER'].mu)
    })

In [17]:
knowledge_df = to_df(knowledge_arr)
knowledge_df

Unnamed: 0_level_0,ACS2,ACS2 ER,ACS2 EER
env,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MAZE4,2318.89 ± 79.61,591.45 ± 20.59,324.65 ± 20.50
MAZE5,3242.57 ± 143.74,631.36 ± 28.46,378.58 ± 17.67
MAZE7,1881.64 ± 107.13,430.77 ± 29.19,281.59 ± 18.18
MAZEF1,956.39 ± 56.68,204.22 ± 11.28,64.40 ± 3.11
MAZEF2,672.61 ± 43.98,126.92 ± 6.82,49.83 ± 1.56
MAZEF3,468.78 ± 57.97,103.93 ± 6.81,49.97 ± 1.45
MAZET2,617.33 ± 39.66,158.50 ± 9.47,57.32 ± 2.26
MAZET3,511.39 ± 43.67,123.31 ± 5.97,59.11 ± 1.95
MAZEX1,5000.00 ± 0.00,1657.97 ± 67.23,1304.34 ± 42.83
MAZEX2,5000.00 ± 0.00,1615.04 ± 58.65,1311.21 ± 39.05


In [19]:
print(knowledge_df.to_latex())

\begin{tabular}{llll}
\toprule
{} &              ACS2 &          ACS2 ER &         ACS2 EER \\
env    &                   &                  &                  \\
\midrule
MAZE4  &   2318.89 ± 79.61 &   591.45 ± 20.59 &   324.65 ± 20.50 \\
MAZE5  &  3242.57 ± 143.74 &   631.36 ± 28.46 &   378.58 ± 17.67 \\
MAZE7  &  1881.64 ± 107.13 &   430.77 ± 29.19 &   281.59 ± 18.18 \\
MAZEF1 &    956.39 ± 56.68 &   204.22 ± 11.28 &     64.40 ± 3.11 \\
MAZEF2 &    672.61 ± 43.98 &    126.92 ± 6.82 &     49.83 ± 1.56 \\
MAZEF3 &    468.78 ± 57.97 &    103.93 ± 6.81 &     49.97 ± 1.45 \\
MAZET2 &    617.33 ± 39.66 &    158.50 ± 9.47 &     57.32 ± 2.26 \\
MAZET3 &    511.39 ± 43.67 &    123.31 ± 5.97 &     59.11 ± 1.95 \\
MAZEX1 &    5000.00 ± 0.00 &  1657.97 ± 67.23 &  1304.34 ± 42.83 \\
MAZEX2 &    5000.00 ± 0.00 &  1615.04 ± 58.65 &  1311.21 ± 39.05 \\
\bottomrule
\end{tabular}



  print(knowledge_df.to_latex())
