In [31]:
import json
import os
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

In [6]:
filters = ['Sub30x30',  "iters1000" ]

In [45]:
base_dir = "evaluation_results"
result_jsons = [
    os.path.join(base_dir, f) for f in os.listdir(base_dir) if 
    f.endswith('.json') and all(pattern in f for pattern in filters)
]
result_jsons

['evaluation_results/2023-11_30x30_1-random-iters1000.json',
 'evaluation_results/2023-11_30x30-iters1000.json',
 'evaluation_results/2023-11_30x30ppo-seed1maskable-iters1000.json',
 'evaluation_results/2023-11_30x30ppo-oldymaskable-iters1000.json',
 'evaluation_results/2023-11_30x30naive-iters1000.json',
 'evaluation_results/2023-11_30x30_baseline_iter.json',
 'evaluation_results/22023-11_30x30iters1000.json',
 'evaluation_results/2023-11_30x30_barrier.json']

In [46]:
current_date_str = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

evaluation_results_sub20_random = f'evaluation_results/{current_date_str}-Sub20x20-random-iters1000.json'
evaluation_results_sub20_expert = f'evaluation_results/{current_date_str}-Sub20x20-expert-iters1000.json'
evaluation_results_sub20_ppo_seed1_maskable = f'evaluation_results/{current_date_str}-Sub20x20-ppo-seed1maskable-iters1000.json'
evaluation_results_sub20_naive = f'evaluation_results/{current_date_str}-Sub20x20-naive-iters1000.json'
               ]
result_jsons

['evaluation_results/2023-11_30x30random-iters1000.json',
 'evaluation_results/2023-11_30x30-expert-iters1000.json',
 'evaluation_results/2023-11_30x30-ppo-seed1maskable-iters1000.json',
 'evaluation_results/2023-11_30x30Sub20x20-naive-iters1000.json']

In [47]:
results = {
    fname.split('-')[-2]: json.load(open(fname, 'r')) for fname in result_jsons
}

In [48]:
for on_fire, burned in zip(results['expert']['num_cells_on_fire'], results['expert']['num_cells_burned']):
    print(on_fire, burned)


1 26
0 15
3 47
0 13
24 68
0 1
0 10
9 35
0 10
154 211
0 1
0 15
0 1
23 80
33 83
0 2
0 7
0 1
0 15
0 6
1 24
0 1
0 1
0 1
1 26
4 23
0 5
0 3
0 2
0 2
16 73
0 1
14 56
78 135
0 6
26 83
9 53
17 38
4 32
9 51
0 11
0 1
0 1
0 2
0 2
0 1
0 2
0 1
141 198
7 46
0 4
0 3
0 1
0 1
0 2
0 6
137 194
200 257
0 8
0 3
0 2
3 31
0 3
0 2
0 3
0 2
126 183
146 203
0 2
21 67
36 92
8 46
2 25
149 206
43 100
6 42
0 1
0 5
56 113
0 4
0 2
0 4
0 1
0 2
8 35
0 7
0 10
0 1
9 63
1 19
72 129
0 3
0 2
0 13
0 2
0 1
0 2
0 11
1 41
0 3
0 2
0 1
4 35
110 167
0 1
146 203
0 11
0 6
4 32
62 119
0 5
9 48
0 11
0 1
8 46
0 2
6 38
0 2
0 2
11 56
11 63
6 50
121 178
0 3
0 6
0 1
0 2
0 3
0 6
6 42
0 1
0 2
0 2
18 69
7 41
22 76
1 6
0 5
1 12
0 2
26 81
0 10
0 3
0 2
0 7
0 3
133 190
8 35
0 1
0 2
0 2
9 52
3 31
2 29
0 2
59 116
0 1
0 3
12 49
22 76
0 2
0 2
0 2
0 6
0 1
0 1
0 14
130 187
0 2
0 2
0 13
0 1
0 11
0 1
1 24
1 8
0 5
0 2
0 11
0 1
0 10
0 2
69 126
0 2
0 2
200 257
1 40
0 1
0 13
0 1
0 1
0 1
6 50
5 34
0 2
0 2
11 39
0 1
0 2
0 2
0 11
0 2
9 48
0 2
0 2
0 1
0 2
0 1
24 68

In [49]:
algo_to_res = {
    fname: {
        'mean': np.mean(result['rewards']),
        'std': np.std(result['rewards']),
        'median': np.median(result['rewards']),
    }
    for fname, result in results.items()
}
algo_to_res

{'random': {'mean': -82.1565,
  'std': 60.87816684986498,
  'median': -70.35000000000001},
 'expert': {'mean': -20.444625, 'std': 41.5653870033033, 'median': -0.525},
 'seed1maskable': {'mean': -24.585974999999998,
  'std': 40.49439288561288,
  'median': -0.6},
 'naive': {'mean': -22.76035,
  'std': 40.878623360841054,
  'median': -0.7000000000000001}}

In [50]:
all_rewards = [
    res['rewards'] for algo, res in results.items() if algo != "random"
]
labels = [a for a in results.keys() if a != "random"]

In [51]:
rewards = {k: r['rewards'] for k, r in results.items() if k not in {"maskable5m"}}
zipped = list(zip(rewards.values()))

In [55]:
rewards.keys()

dict_keys(['random', 'expert', 'seed1maskable', 'naive'])

In [56]:
y = list(rewards.values())

In [57]:
print(tuple(rewards.keys()))

winners = []

for idx, v in enumerate(zip(*y)):
    v = tuple(round(z, 2) for z in v)
    v_np = np.array(v)
    winner = np.argmax(v_np)
    winner = tuple(rewards.keys())[winner]
#     if (np.abs(v_np[0] - v_np[1]) >= 50 or np.abs(v_np[2] - v_np[1]) >= 50) and winner == "maskable":
#     if winner == "expert":
#     if winner == "maskable":
#         print(idx, v, winner)
    winners.append(winner)

('random', 'expert', 'seed1maskable', 'naive')


In [59]:
zz = Counter(winners)
zz

Counter({'seed1maskable': 468, 'naive': 164, 'expert': 360, 'random': 8})

In [60]:
sum(zz.values())

1000