Parses the optimization data for PO-UCT hyperparameters.

In [11]:
import glob
import pandas as pd

In [23]:
# Optimal parameter choices for the 2_36 environments
def parse(path):
    data = []
    for file_name in glob.glob(path):
        data.append(pd.read_csv(file_name, low_memory=False))
    df = pd.concat(data)
    assert len(df[df["NumActions"]>201]) == 0
    return df
df=parse(f'./data/simulation_results/po_uct_optimization/*.csv')

# Remove duplicates
df = df.loc[df["EnvSeed"]>=5000]
duplicates = df.duplicated(keep="first", subset=["EnvSeed", "EnvType", "Cost", "Steps", "RolloutDepth", "ExplorationCoeff"])
df = df.loc[~duplicates, :]

In [24]:
# Dict format (env, cost, steps): (reward, rollout depth, exploration coefficient)
params = {}
for env in ["2_36", "3_54", "4_72", "5_90"]:
    for cost in [0.05, 1]:
        for steps in [10, 100, 1000, 5000]:
            key = (env, cost, steps)
            for rollout_depth in df["RolloutDepth"].unique():
                for exploration_coeff in df["ExplorationCoeff"].unique():
                    tmp = df.loc[(df["EnvType"]==env) & (df["Cost"]==cost) & (df["Steps"]==steps) & (df["RolloutDepth"]==rollout_depth) & (df["ExplorationCoeff"]==exploration_coeff), :]
                    if len(tmp) != 500:
                        print(f"Observations: {len(tmp)}")
                        print(f"Env: {env}, Cost: {cost}, Steps: {steps}, RolloutDepth: {rollout_depth}, ExplorationCoeff {exploration_coeff}")
                        missing = list(set(tmp['EnvSeed'])^set(range(5000, 5500)))
                        missing.sort()
                        print(f"Missing seeds: {missing}")
                    mean_reward = tmp["ExpectedReward"].mean()
                    if key not in params or mean_reward > params[key][0]:
                        params[key] = (mean_reward, rollout_depth, exploration_coeff)
print(params)

{('2_36', 0.05, 10): (53.86140126755538, 0, 100.0), ('2_36', 0.05, 100): (114.79743731895626, 3, 100.0), ('2_36', 0.05, 1000): (117.22153836589568, 3, 1.0), ('2_36', 0.05, 5000): (118.98186894033901, 0, 5.0), ('2_36', 1, 10): (-136.95139489519053, 0, 100.0), ('2_36', 1, 100): (99.7101134678578, 0, 10.0), ('2_36', 1, 1000): (102.25342210808444, 0, 100.0), ('2_36', 1, 5000): (103.41719224662144, 0, 5.0), ('3_54', 0.05, 10): (54.00796981930902, 3, 100.0), ('3_54', 0.05, 100): (157.8237733668935, 3, 100.0), ('3_54', 0.05, 1000): (161.15232620732044, 3, 5.0), ('3_54', 0.05, 5000): (161.51919728120674, 0, 50.0), ('3_54', 1, 10): (-137.2166788948118, 3, 100.0), ('3_54', 1, 100): (140.0400746313717, 0, 100.0), ('3_54', 1, 1000): (144.0835938568375, 0, 10.0), ('3_54', 1, 5000): (146.05183062420843, 0, 100.0), ('4_72', 0.05, 10): (53.94942174511905, 3, 100.0), ('4_72', 0.05, 100): (193.4648609058111, 3, 10.0), ('4_72', 0.05, 1000): (196.78642688211428, 3, 10.0), ('4_72', 0.05, 5000): (196.654616

In [25]:
data = []
for key, value in params.items():
    (env, cost, steps) = key
    (mean_reward, rollout_depth, exploration_coeff) = value
    data.append([env, cost, steps, rollout_depth, exploration_coeff])

df = pd.DataFrame(data, columns=["Environment", "Cost", "Steps", "RolloutDepth", "ExplorationCoeff"])

In [26]:
df.to_csv("data/simulation_results/pouct_hyperparameters.csv")

In [27]:
print(df.pivot(index=["Cost", "Steps"], columns=["Environment"], values=["ExplorationCoeff", "RolloutDepth"]).to_latex())

\begin{tabular}{llrrrrrrrr}
\toprule
     & {} & \multicolumn{4}{l}{ExplorationCoeff} & \multicolumn{4}{l}{RolloutDepth} \\
     & Environment &             2\_36 &   3\_54 &   4\_72 &   5\_90 &         2\_36 & 3\_54 & 4\_72 & 5\_90 \\
Cost & Steps &                  &        &        &        &              &      &      &      \\
\midrule
0.05 & 10   &            100.0 &  100.0 &  100.0 &  100.0 &          0.0 &  3.0 &  3.0 &  0.0 \\
     & 100  &            100.0 &  100.0 &   10.0 &    5.0 &          3.0 &  3.0 &  3.0 &  3.0 \\
     & 1000 &              1.0 &    5.0 &   10.0 &  100.0 &          3.0 &  3.0 &  3.0 &  3.0 \\
     & 5000 &              5.0 &   50.0 &    5.0 &    5.0 &          0.0 &  0.0 &  3.0 &  3.0 \\
1.00 & 10   &            100.0 &  100.0 &  100.0 &  100.0 &          0.0 &  3.0 &  3.0 &  0.0 \\
     & 100  &             10.0 &  100.0 &    5.0 &   50.0 &          0.0 &  0.0 &  0.0 &  0.0 \\
     & 1000 &            100.0 &   10.0 &   50.0 &  100.0 &          0.0 & 

In [28]:
df.pivot(index=["Cost", "Steps"], columns=["Environment"], values=["ExplorationCoeff", "RolloutDepth"])

Unnamed: 0_level_0,Unnamed: 1_level_0,ExplorationCoeff,ExplorationCoeff,ExplorationCoeff,ExplorationCoeff,RolloutDepth,RolloutDepth,RolloutDepth,RolloutDepth
Unnamed: 0_level_1,Environment,2_36,3_54,4_72,5_90,2_36,3_54,4_72,5_90
Cost,Steps,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
0.05,10,100.0,100.0,100.0,100.0,0.0,3.0,3.0,0.0
0.05,100,100.0,100.0,10.0,5.0,3.0,3.0,3.0,3.0
0.05,1000,1.0,5.0,10.0,100.0,3.0,3.0,3.0,3.0
0.05,5000,5.0,50.0,5.0,5.0,0.0,0.0,3.0,3.0
1.0,10,100.0,100.0,100.0,100.0,0.0,3.0,3.0,0.0
1.0,100,10.0,100.0,5.0,50.0,0.0,0.0,0.0,0.0
1.0,1000,100.0,10.0,50.0,100.0,0.0,0.0,0.0,0.0
1.0,5000,5.0,100.0,100.0,50.0,0.0,0.0,0.0,0.0
