In [16]:
import os
import numpy as np
import pandas as pd

folder_path = 'log_safe/minigrid_hyperparameter_search-42-mask-linear_comb/'  # Specify the path to the folder

In [40]:
df = pd.DataFrame(columns=['max_score', 'area_under_curve', 'biggest_dip'], dtype='float')

# Iterate over each item in the folder
for idx, run in enumerate(os.listdir(folder_path)):
    run_path = os.path.join(folder_path, run)
    data = np.loadtxt(f"{run_path}/eval_data.csv")

    df.loc[idx, 'max_score'] = max(data)
    df.loc[idx, 'area_under_curve'] = sum(data)
    df.loc[idx, 'biggest_dip'] = np.diff(data).min()
    df.loc[idx, 'run'] = run

# Sort by run number
df = df.sort_values('run')
df = df.reset_index(drop=True)

df

Unnamed: 0,max_score,area_under_curve,biggest_dip,run
0,0.8698,13.3158,-0.1083,230628-165101
1,0.8594,13.5636,-0.0688,230628-165439
2,0.8844,13.6564,-0.1386,230628-165830
3,0.8792,12.6408,-0.0864,230628-170214
4,0.8812,12.5081,-0.1521,230628-170558
5,0.8677,13.3718,-0.1219,230628-170943
6,0.8323,11.7217,-0.0917,230628-171331
7,0.851,12.6353,-0.1406,230628-171720
8,0.8792,12.7416,-0.0718,230628-172105
9,0.8563,13.9938,-0.0687,230628-172447


In [41]:
print("Best Max Scores")
indices = df['max_score'].nlargest(5).index
df.loc[indices]

Best Max Scores


Unnamed: 0,max_score,area_under_curve,biggest_dip,run
21,0.8948,14.1281,-0.0563,230628-180922
22,0.8854,13.8854,-0.3594,230628-181332
2,0.8844,13.6564,-0.1386,230628-165830
24,0.8844,13.5729,-0.2688,230628-182105
26,0.8823,13.2437,-0.2959,230628-182830


In [42]:
print("Best AUC")
indices = df['area_under_curve'].nlargest(5).index
df.loc[indices]

Best AUC


Unnamed: 0,max_score,area_under_curve,biggest_dip,run
21,0.8948,14.1281,-0.0563,230628-180922
14,0.875,14.0302,-0.1917,230628-174319
9,0.8563,13.9938,-0.0687,230628-172447
20,0.8448,13.9613,-0.0448,230628-180539
23,0.876,13.9228,-0.1146,230628-181721


In [43]:
print("Min Dip")
indices = df['biggest_dip'].nlargest(5).index
df.loc[indices]

Min Dip


Unnamed: 0,max_score,area_under_curve,biggest_dip,run
20,0.8448,13.9613,-0.0448,230628-180539
21,0.8948,14.1281,-0.0563,230628-180922
9,0.8563,13.9938,-0.0687,230628-172447
1,0.8594,13.5636,-0.0688,230628-165439
8,0.8792,12.7416,-0.0718,230628-172105


### Results
Best results in almost all categories for index 21, i.e. run 230628-180922.

**Corresponding hyperparameters**


In [46]:
entropy_weights = [0.01, 0.05, 0.1]
ratio_clips = [0.1, 0.25, 0.5]
gradient_clips = [0.5, 1, 2]

run_idx = 0
for entropy_weight in entropy_weights:
    for ratio_clip in ratio_clips:
        for gradient_clip in gradient_clips:
            if run_idx == 21:
                print("Entropy Weight:", entropy_weight)
                print("Ratio Clip:", ratio_clip)
                print("Gradient Clip:", gradient_clip)
            run_idx += 1

Entropy Weight: 0.1
Ratio Clip: 0.25
Gradient Clip: 0.5
