In [1]:
import json
import yaml
import numpy as np
import shutil
from pathlib import Path
import pandas as pd
from mouselab.mouselab import MouselabEnv
from mouselab.env_utils import get_ground_truths_from_json
import dill as pickle
from collections import Counter
from costometer.utils.analysis_utils import AnalysisObject

# Creating data, modifying code

In [2]:
discount_factors = [.10, .20, .30, .40, .50,.75]#,.85]#.9,.95,.975]
alphas = [0.0,.25,.5,.75,.9]

Put this in `cluster/src/cluster_utils.py`

Generate cluster submission commands to run to get Q values

In [3]:
print(f"for gamma in 1 {' '.join([str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do for alpha in 1 {' '.join([str(alpha) for alpha in alphas])};\n"
      f"        do condor_submit_bid 2 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing cost_function=back_dist_depth_eff_forw param_file=back_dist_depth_eff_forw alpha=$alpha gamma=$gamma;\n"
      f"done; done;")

for gamma in 1 0.1 0.2 0.3 0.4 0.5 0.75;
    do for alpha in 1 0.0 0.25 0.5 0.75 0.9;
        do condor_submit_bid 2 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing cost_function=back_dist_depth_eff_forw param_file=back_dist_depth_eff_forw alpha=$alpha gamma=$gamma;
done; done;


Generate cluster submission commands to infer likelihood for methods_main:

In [None]:
print(f"for gamma in 1 {' '.join([str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do for alpha in 1 {' '.join([str(alpha) for alpha in alphas])};\n"
      f"        do condor_submit_bid 2 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main cost_function=back_dist_depth_eff_forw param_file=dist_depth_eff_forw alpha=$alpha gamma=$gamma;\n"
      f"done; done;")

Combine data:

In [None]:
print(f"for experiment in methods_main {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do for alpha in 1 {' '.join([str(alpha) for alpha in alphas])};\n"
      f"        do condor_submit_bid 2 submission_scripts/MPI-IS/05_Combine_Human.sub experiment=$experiment cost_function=dist_depth_eff_forw alpha=$alpha;\n"
      f"done; done;")

MAP:

In [None]:
print(f"for experiment in methods_main {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do for alpha in 1 {' '.join([str(alpha) for alpha in alphas])};\n"
      f"        do condor_submit_bid 2 submission_scripts/MPI-IS/M_01_Get_MAP_File_by_PID.sub pid_file=methods_main pid_file=methods_main experiment=$experiment cost_function=dist_forw_eff alpha=$alpha;\n"
      f"done; done;")

In [None]:
with open(Path("../../../analysis/methods/static/inputs/yamls/NoDepth.yaml"), "rb") as f:
    analysis_setting = yaml.safe_load(f)
  
for experiment_suffix in discount_factors:
    params = [f"{experiment_suffix:.3f}{alpha_suffix}" for alpha_suffix in [""] + [f"_{alpha:.2f}" for alpha in alphas]]

    analysis_setting["params"] = params
    
    with open(Path(f"../../../analysis/methods/static/inputs/yamls/DiscountFactor{experiment_suffix}.yaml"), "w") as f:
        yaml.dump(analysis_setting, f)

In [None]:
experiment_val_dict = {} 
alpha_val_dict = {}
for experiment_val, experiment_suffix in zip([1]+ discount_factors, [""] + discount_factors):
    for alpha_val, alpha_suffix in zip([1] + alphas, [""] + [f"_{alpha:.2f}" for alpha in alphas]):
        if experiment_suffix == "":
            alpha_val_dict[f"{experiment_suffix}{alpha_suffix}"] = alpha_val
            experiment_val_dict[f"{experiment_suffix}{alpha_suffix}"] = experiment_val
        else:
            alpha_val_dict[f"{experiment_suffix:.3f}{alpha_suffix}"] = alpha_val
            experiment_val_dict[f"{experiment_suffix:.3f}{alpha_suffix}"] = experiment_val

Transfer over:

```
rsync -aPzr --include "methods_main*/*/*mle_and_map*" --include "*/" --exclude "*" vfelso@login.cluster.is.localnet:/work/vfelso/planning-depth-differences/data/processed/ data/processed
```

# Fit of main dataset by discount factor + power utility function model instead of depth in winning model

Distance, Effort, Depth and Forward Search Bonus              40490.81774

In [None]:
# optimization_dfs = []

# for experiment_suffix in [""] + discount_factors:
#     analysis_obj = AnalysisObject(f"DiscountFactor{experiment_suffix}", irl_path=Path("../../..").resolve(), experiment_subdirectory="methods/static")

#     optimization_data = analysis_obj.query_optimization_data(include_null=False)
#     optimization_data=optimization_data[optimization_data["Model Name"]=="Distance and Effort Costs with Forward Search Bonus"]

#     optimization_dfs.append(optimization_data)

# optimization_dfs = pd.concat(optimization_dfs)

# optimization_dfs.to_csv("discount_factors.csv")

In [None]:
optimization_dfs = pd.read_csv("discount_factors.csv")
optimization_dfs["session"]=optimization_dfs["session"].fillna("")

optimization_dfs["alpha"] = optimization_dfs["session"].apply(lambda session: alpha_val_dict[session])
optimization_dfs["discount_factor"] = optimization_dfs["session"].apply(lambda session: experiment_val_dict[session])

In [None]:
# optimization_dfs = optimization_dfs[optimization_dfs["alpha"]==1]

In [None]:
optimization_dfs=optimization_dfs.reset_index()
best_param_rows = optimization_dfs.loc[
    optimization_dfs.groupby(["trace_pid"]).idxmax()["value"]
]

In [None]:
best_param_rows.groupby(["session"]).count().sort_values(by="trace_pid")

In [17]:
Counter(best_param_rows["alpha"])

Counter({1.0: 98, 0.25: 7, 0.5: 7, 0.9: 8, 0.75: 2})

In [18]:
np.mean(best_param_rows["discount_factor"]), np.std(best_param_rows["discount_factor"])

(0.7487704918032786, 0.3079854634339799)

In [19]:
Counter(best_param_rows["discount_factor"])

Counter({1.0: 60, 0.75: 29, 0.1: 11, 0.3: 3, 0.5: 8, 0.4: 7, 0.2: 4})

In [20]:
np.mean(best_param_rows["temp"]), np.std(best_param_rows["temp"])

(1.2270491803278687, 1.022515933199512)

# Simulations

In [21]:
print(f"for experiment_setting in high_increasing {' '.join(['high_increasing' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do for alpha in 1 {' '.join([str(alpha) for alpha in alphas])};\n"
      f"        do condor_submit_bid 2 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=$experiment_setting cost_function=back_dist_depth_eff_forw param_file=null alpha=$alpha;\n"
      f"done; done;")

for experiment_setting in high_increasing high_increasing0.1 high_increasing0.2 high_increasing0.3 high_increasing0.4 high_increasing0.5 high_increasing0.75;
    do for alpha in 1 0.0 0.25 0.5 0.75 0.9;
        do condor_submit_bid 2 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=$experiment_setting cost_function=back_dist_depth_eff_forw param_file=null alpha=$alpha;
done; done;


```
rsync -aPzr --include "high_increasing*/OptimalQ/simulated_agents_back_dist_depth_eff_forw*" --include "*/" --exclude "*" vfelso@login.cluster.is.localnet:/work/vfelso/planning-depth-differences/cluster/data/trajectories/ cluster/data/trajectories
```


# Look at simulations

In [22]:
depth_data = pd.concat([pd.read_csv(f, index_col=0) for f in Path("../../../cluster/data/trajectories/high_increasing/OptimalQ/").glob("simulated_agents_linear_depth_*_91.00.csv")])
depth_only_data = pd.concat([pd.read_csv(f, index_col=0) for f in Path("../../../cluster/data/trajectories/high_increasing/OptimalQ/").glob("simulated_agents_depth_only_*_91.00.csv")])

In [23]:
depth_data.groupby(["sim_depth_cost_weight"]).mean()

  depth_data.groupby(["sim_depth_cost_weight"]).mean()


Unnamed: 0_level_0,n_steps,i_episode,finished,return,trial_id,actions,rewards,sim_exact,sim_temperature_file,sim_num_simulated,sim_num_trials,sim_seed,sim_static_cost_weight,pid
sim_depth_cost_weight,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
-5.0,13.696003,9.501049,1.0,170.873703,1.146243e+17,7.05849,13.371199,0.0,,54.221865,20.0,91.0,1.063876,26.612189
-2.5,13.5176,9.515829,1.0,107.983296,7.307269e+16,7.09931,8.630997,0.0,,56.560678,20.0,91.0,0.728505,27.77623
-1.0,12.684407,9.509322,1.0,70.870104,1.363922e+17,7.279407,6.159043,0.0,,55.68412,20.0,91.0,0.547208,27.326802
-0.1,11.905653,9.476758,1.0,57.395145,4.704697e+17,7.508223,5.878279,0.0,,10.0,20.0,91.0,-0.022484,4.502905
0.0,9.72391,9.529295,1.0,50.710475,4.196647e+17,8.00575,7.395792,0.0,,39.757557,20.0,91.0,0.271548,19.440438
0.1,10.215781,9.452049,1.0,54.658385,4.584939e+17,7.869466,7.012584,0.0,,10.0,20.0,91.0,-0.023157,4.502474
0.5,9.653321,9.517381,1.0,46.814723,4.056651e+17,7.974976,6.552327,0.0,,10.0,20.0,91.0,0.030522,4.473556
1.0,7.842421,9.490656,1.0,34.882873,4.328797e+17,8.421235,6.828057,0.0,,46.116743,20.0,91.0,0.342109,22.622292
2.5,6.477913,9.535244,1.0,13.882179,1.760264e+17,8.746095,4.689004,0.0,,51.102402,20.0,91.0,0.559582,25.214378
5.0,4.521978,9.516798,1.0,-0.404663,1.399173e+17,9.583907,2.119631,0.0,,50.719663,20.0,91.0,1.019768,24.920991


In [24]:
depth_only_data.groupby(["sim_depth_cost_weight"]).mean()

  depth_only_data.groupby(["sim_depth_cost_weight"]).mean()


Unnamed: 0_level_0,n_steps,i_episode,finished,return,trial_id,actions,rewards,sim_exact,sim_temperature_file,sim_cost_parameter_values,sim_num_simulated,sim_num_trials,sim_seed,pid
sim_depth_cost_weight,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
-5.0,11.21989,9.49593,1.0,113.074833,1.78801e+17,7.551589,11.095626,0.0,,-5.0,100.0,20.0,91.0,49.564535
-2.5,11.240762,9.498116,1.0,75.289536,1.169812e+17,7.545935,7.376487,0.0,,-2.5,100.0,20.0,91.0,49.522197
-1.0,10.221193,9.532738,1.0,52.700149,2.20108e+17,7.718683,5.806498,0.0,,-1.0,100.0,20.0,91.0,49.693309
0.0,6.278772,9.443878,1.0,38.413891,2.443176e+17,8.876141,9.982103,0.0,,0.0,100.0,20.0,91.0,49.568211
1.0,6.26626,9.451893,1.0,29.799602,6.096593e+17,8.870972,8.274698,0.0,,1.0,100.0,20.0,91.0,49.681706
2.5,6.054337,9.507554,1.0,19.120614,2.966608e+17,8.935916,6.570967,0.0,,2.5,100.0,20.0,91.0,49.063962
5.0,6.117969,9.534557,1.0,0.403003,3.319853e+17,8.8495,2.75286,0.0,,5.0,100.0,20.0,91.0,49.481173
7.5,4.199356,9.428418,1.0,0.105941,3.002324e+16,9.760558,2.525412,0.0,,7.5,100.0,20.0,91.0,49.544381
10.0,4.219843,9.468772,1.0,-11.576017,5.926656e+16,9.750892,-0.914347,0.0,,10.0,100.0,20.0,91.0,49.325125


In [25]:
discount_data = []
for discount_factor, experiment_suffix in zip([1] + discount_factors, [""] + discount_factors):
    for alpha, alpha_suffix in zip([1]+alphas, [""] + [f"_{alpha}" for alpha in alphas]):
        curr_df = pd.read_csv(f"../../../cluster/data/trajectories/high_increasing{experiment_suffix}{alpha_suffix}/OptimalQ/simulated_agents_back_dist_depth_eff_forw_0.00_0.00_0.00_0.00_1.00_91.00.csv", index_col=0) 
        curr_df["discount_factor"] = discount_factor
        curr_df["alpha"] = alpha
        discount_data.append(curr_df)
discount_data = pd.concat(discount_data)

In [26]:
discount_data.groupby(["discount_factor", "alpha"]).mean()

  discount_data.groupby(["discount_factor", "alpha"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,n_steps,i_episode,finished,return,trial_id,actions,rewards,sim_exact,sim_temperature_file,sim_num_simulated,sim_num_trials,sim_seed,sim_alpha,sim_back_added_cost,sim_depth_cost_weight,sim_distance_multiplier,sim_forw_added_cost,sim_given_cost,pid
discount_factor,alpha,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0.1,0.0,3.619048,9.482143,1.0,0.503968,-1.882007e+17,8.444444,0.301587,0.0,,10.0,20.0,91.0,0.0,0.0,0.0,0.0,0.0,1.0,4.494048
0.1,0.25,3.642436,9.35167,1.0,0.440079,-9.922029e+16,8.324165,0.269155,0.0,,10.0,20.0,91.0,0.25,0.0,0.0,0.0,0.0,1.0,4.499018
0.1,0.5,3.57085,9.246964,1.0,0.676113,6.087526e+16,8.51417,0.380567,0.0,,10.0,20.0,91.0,0.5,0.0,0.0,0.0,0.0,1.0,4.508097
0.1,0.75,3.60479,9.636727,1.0,0.335329,5.86237e+17,8.433134,0.241517,0.0,,10.0,20.0,91.0,0.75,0.0,0.0,0.0,0.0,1.0,4.469062
0.1,0.9,3.575758,9.579798,1.0,0.791919,6.288288e+17,8.410101,0.410101,0.0,,10.0,20.0,91.0,0.9,0.0,0.0,0.0,0.0,1.0,4.470707
0.1,1.0,3.628458,9.452569,1.0,0.498024,-5.848669e+17,8.462451,0.300395,0.0,,10.0,20.0,91.0,1.0,0.0,0.0,0.0,0.0,1.0,4.496047
0.2,0.0,4.294627,9.639515,1.0,-0.407626,2.050235e+16,8.185442,0.14565,0.0,,10.0,20.0,91.0,0.0,0.0,0.0,0.0,0.0,1.0,4.570191
0.2,0.25,4.007273,9.12,1.0,0.201455,1.72965e+16,8.361818,0.256727,0.0,,10.0,20.0,91.0,0.25,0.0,0.0,0.0,0.0,1.0,4.478182
0.2,0.5,3.981584,9.425414,1.0,0.185635,-7.240391e+17,8.390424,0.261878,0.0,,10.0,20.0,91.0,0.5,0.0,0.0,0.0,0.0,1.0,4.581952
0.2,0.75,4.624473,9.585091,1.0,0.037918,-8.954899e+16,8.070323,0.066442,0.0,,10.0,20.0,91.0,0.75,0.0,0.0,0.0,0.0,1.0,4.489451


In [27]:
sim_cost_parameters={}
for cost_function in ["linear_depth", "back_dist_depth_eff_forw"]:
    yaml_file = Path(
                        f"../../../data/inputs/yamls/cost_functions/{cost_function}.yaml"
                    )
    with open(str(yaml_file), "r") as stream:
        cost_details = yaml.safe_load(stream)

    sim_cost_parameters[cost_function] = [
                                        f"sim_{param}" for param in cost_details["constant_values"].keys()
                                        ]

In [28]:
for classification, nodes in experiment_setting_data[
    "node_classification"
].items():
    discount_data[classification] = discount_data["actions"].apply(
        lambda action: action in nodes
    )
    depth_only_data[classification] = depth_only_data["actions"].apply(
        lambda action: action in nodes
    )
    depth_data[classification] = depth_data["actions"].apply(
        lambda action: action in nodes
    )

sum_discount_clicks = (
    discount_data.groupby(["pid", "i_episode", *sim_cost_parameters["back_dist_depth_eff_forw"], "discount_factor", "alpha"])
    .sum()
    .reset_index()
    .groupby([*sim_cost_parameters["back_dist_depth_eff_forw"], "discount_factor", "alpha"])
    .mean()
    .reset_index()
)

sum_depth_clicks = (
    depth_data.groupby(["pid", "i_episode", *sim_cost_parameters["linear_depth"]])
    .sum()
    .reset_index()
    .groupby([ *sim_cost_parameters["linear_depth"]])
    .mean()
    .reset_index()
)

NameError: name 'experiment_setting_data' is not defined

In [None]:
sum_depth_clicks.groupby([ "sim_depth_cost_weight"]).mean()[["early", "middle", "late", "clicks"]]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sum_df=sum_discount_clicks.groupby([ "discount_factor", "alpha"]).mean().reset_index()

for field in ["early", "middle", "late", "clicks"]:
    heat_map_data = sum_df.pivot(index="discount_factor", columns="alpha", values=field)
    plt.figure()
    plt.title(field)
    sns.heatmap(data=heat_map_data, annot=True, fmt=".2f")
