In [1]:
import json
import yaml
import numpy as np
import shutil
from pathlib import Path
import pandas as pd
from mouselab.mouselab import MouselabEnv
from mouselab.env_utils import get_ground_truths_from_json
import dill as pickle

# Creating data, modifying code

In [66]:
discount_factors = [.10, .20, .30, .40, .50,.75,.85,.9,.95,.975,.999]
alphas = [0,.25,.5,.75,.9]

In [3]:
for discount_factor in discount_factors:
    shutil.copyfile( Path(f"../../../data/inputs/exp_inputs/structure/312_2_4_24.json"), \
                     Path(f"../../../data/inputs/exp_inputs/structure/high_increasing{discount_factor}.json"))

In [4]:
with open(Path("../../../data/inputs/yamls/experiment_settings/high_increasing.yaml"), "rb") as f:
    experiment_setting_data = yaml.safe_load(f)
    
for discount_factor in discount_factors:
    experiment_setting_data["ground_truth_file"] = f"high_increasing{discount_factor}"
    experiment_setting_data["structure"] = f"high_increasing{discount_factor}"
    
    with open(Path(f"../../../data/inputs/yamls/experiment_settings/high_increasing{discount_factor}.yaml"), "w") as f:
        yaml.dump(experiment_setting_data, f)

In [5]:
env = MouselabEnv.new_symmetric_registered("high_increasing")

for discount_factor in discount_factors:
    with open(Path("../../../data/inputs/exp_inputs/rewards/312_2_4_24.json"), "rb") as f:
        data = json.load(f)
    
    for trial in data:
        trial["stateRewards"] = [round(reward*discount_factor**(env.mdp_graph.nodes[action]["depth"]-1),2) if env.mdp_graph.nodes[action]["depth"] != 0 else 0.0 for action, reward in enumerate(trial["stateRewards"])]

    with open(Path(f"../../../data/inputs/exp_inputs/rewards/high_increasing{discount_factor}.json"), "w") as f:
        json.dump(data, f)

In [6]:
ls ../../../data/inputs/yamls/experiments

c1.1.yaml                    methods_main0.9.yaml
c2.1.yaml                    methods_main0.95.yaml
irl_validation.yaml          methods_main0.975.yaml
methods_main.yaml            methods_main0.999.yaml
methods_main0.1.yaml         methods_main_extension.yaml
methods_main0.2.yaml         methods_main_transfer.yaml
methods_main0.3.yaml         quest_feasibility.yaml
methods_main0.4.yaml         quest_first.yaml
methods_main0.5.yaml         quest_main.yaml
methods_main0.75.yaml        quest_second.yaml
methods_main0.85.yaml


In [7]:
with open(Path("../../../data/inputs/yamls/experiments/methods_main.yaml"), "rb") as f:
    data = yaml.safe_load(f)

for discount_factor in discount_factors:
    data['sessions'] = [f'methods_main{discount_factor}']
    data['experiment_setting'] = f'high_increasing{discount_factor}'
    with open(Path(f"../../../data/inputs/yamls/experiments/methods_main{discount_factor}.yaml"), "w") as f:
        yaml.dump(data, f)

Put this in `cluster/src/cluster_utils.py`

In [8]:
for discount_factor in discount_factors:
    print(f"elif name == 'high_increasing{discount_factor}':\n"
          f"    register(\n"
          f"        name=name,\n"
          f"        branching=[3, 1, 2],\n"
          f"        reward_inputs=\"depth\",\n"
          f"        reward_dictionary={{\n"
          f"            1: Categorical([-4, -2, 2, 4]),\n"
          f"            2: Categorical([{', '.join(str(round(val*discount_factor,2)) for val in [-8, -4, 4, 8])}]),\n"
          f"            3: Categorical([{', '.join(str(round(val*discount_factor**2,2)) for val in [-48, -24, 24, 48])}]),\n"
          f"        }},\n"
          f"    )")

elif name == 'high_increasing0.1':
    register(
        name=name,
        branching=[3, 1, 2],
        reward_inputs="depth",
        reward_dictionary={
            1: Categorical([-4, -2, 2, 4]),
            2: Categorical([-0.8, -0.4, 0.4, 0.8]),
            3: Categorical([-0.48, -0.24, 0.24, 0.48]),
        },
    )
elif name == 'high_increasing0.2':
    register(
        name=name,
        branching=[3, 1, 2],
        reward_inputs="depth",
        reward_dictionary={
            1: Categorical([-4, -2, 2, 4]),
            2: Categorical([-1.6, -0.8, 0.8, 1.6]),
            3: Categorical([-1.92, -0.96, 0.96, 1.92]),
        },
    )
elif name == 'high_increasing0.3':
    register(
        name=name,
        branching=[3, 1, 2],
        reward_inputs="depth",
        reward_dictionary={
            1: Categorical([-4, -2, 2, 4]),
            2: Categorical([-2.4, -1.2, 1.2, 2.4]),
            3: Categorical([-4.32, -2.16, 2.16, 4.32]),
        },
    )
elif name == 'high_increa

In [9]:
np.savetxt(
    Path("../../../cluster/parameters/cost/null.txt"),
    [[1.0, 0.0]],
    fmt="%.02f",
    delimiter=",",
)

In [10]:
mouselab_data = pd.read_csv(Path("../../../data/processed/methods_main/mouselab-mdp.csv"), index_col=0)
mouselab_data = mouselab_data[mouselab_data["block"]=="test"]
for discount_factor in discount_factors:
    with open(Path(f"../../../data/inputs/exp_inputs/rewards/high_increasing{discount_factor}.json"), "rb") as f:
        data = json.load(f)
    ground_truth_dict = {trial["trial_id"] : trial["stateRewards"] for trial in data}

    mouselab_data["state_rewards"] = mouselab_data["trial_id"].apply(lambda trial_id : ground_truth_dict[trial_id][1:])
    
    # need to change ground truths
    Path(f"../../../data/processed/methods_main{discount_factor}").mkdir(exist_ok = True, parents=True)
    mouselab_data.to_csv(Path(f"../../../data/processed/methods_main{discount_factor}/mouselab-mdp.csv"), index=False)

Generate cluster submission commands to run to get Q values

In [11]:
for discount_factor in discount_factors:
    print(f"condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing{discount_factor} cost_function=linear_depth param_file=null")

condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.1 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.2 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.3 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.4 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.5 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.75 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.85 cost_functio

In [69]:
for discount_factor in discount_factors:
    for alpha in alphas:
        print(f"condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing cost_function=linear_depth param_file=null alpha={alpha}")

condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing cost_function=linear_depth param_file=null alpha=0
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing cost_function=linear_depth param_file=null alpha=0.25
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing cost_function=linear_depth param_file=null alpha=0.5
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing cost_function=linear_depth param_file=null alpha=0.75
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing cost_function=linear_depth param_file=null alpha=0.9
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing cost_function=linear_depth param_file=null alpha=0
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_s

In [67]:
for discount_factor in discount_factors:
    for alpha in alphas:
        print(f"condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing{discount_factor} cost_function=linear_depth param_file=null alpha={alpha}")

condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.1 cost_function=linear_depth param_file=null alpha=0
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.1 cost_function=linear_depth param_file=null alpha=0.25
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.1 cost_function=linear_depth param_file=null alpha=0.5
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.1 cost_function=linear_depth param_file=null alpha=0.75
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.1 cost_function=linear_depth param_file=null alpha=0.9
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.2 cost_function=linear_depth param_file=null alpha=0
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Value

Generate cluster submission commands to infer likelihood for methods_main:

In [12]:
for discount_factor in discount_factors:
    print(f"condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main{discount_factor} cost_function=linear_depth param_file=null")

condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.1 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.2 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.3 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.4 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.5 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.75 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.85 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/

Combine data:

In [13]:
print(f"for experiment in {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do condor_submit_bid 2 submission_scripts/MPI-IS/05_Combine_Human.sub experiment=$experiment cost_function=linear_depth;\n"
      f"done;")

for experiment in methods_main0.1 methods_main0.2 methods_main0.3 methods_main0.4 methods_main0.5 methods_main0.75 methods_main0.85 methods_main0.9 methods_main0.95 methods_main0.975 methods_main0.999;
    do condor_submit_bid 2 submission_scripts/MPI-IS/05_Combine_Human.sub experiment=$experiment cost_function=linear_depth;
done;


MAP:

In [14]:
print(f"for experiment in {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do condor_submit_bid 2 submission_scripts/MPI-IS/M_01_Get_MAP_File.sub experiment=$experiment cost_function=linear_depth;\n"
      f"done;")

for experiment in methods_main0.1 methods_main0.2 methods_main0.3 methods_main0.4 methods_main0.5 methods_main0.75 methods_main0.85 methods_main0.9 methods_main0.95 methods_main0.975 methods_main0.999;
    do condor_submit_bid 2 submission_scripts/MPI-IS/M_01_Get_MAP_File.sub experiment=$experiment cost_function=linear_depth;
done;


Transfer over:

```
rsync -aPzr --include "methods_main*/*/*mle_and_map*" --include "*/" --exclude "*" vfelso@login.cluster.is.localnet:/work/vfelso/planning-depth-differences/data/processed/ data/processed
```

# Fit of main dataset by discount factor model without cost

In [15]:
dfs = []
for discount_factor in discount_factors:
    with open(f"../../../data/processed/methods_main{discount_factor}/linear_depth/mle_and_map.pickle", "rb") as f:
        data =pickle.load(f)
    
    df = data["SoftmaxPolicy"]["test_map_expon"][()]
    # TODO test same as simple
    df["discount_factor"] = discount_factor
    dfs.append(df)
    
discount_fitted_data = pd.concat(dfs)

In [16]:
discount_fitted_data = discount_fitted_data.reset_index()

In [17]:
best_param_rows = discount_fitted_data.loc[
    discount_fitted_data.groupby(["trace_pid"]).idxmax()["test_map_expon"]
]

  discount_fitted_data.groupby(["trace_pid"]).idxmax()["test_map_expon"]


In [18]:
from collections import Counter
np.all([counter == 1 for pid, counter in Counter(best_param_rows["trace_pid"]).most_common(5)])

True

In [19]:
# best_param_rows
from statsmodels.tools.eval_measures import bic

best_param_rows["bic"] = best_param_rows.apply(
        lambda row: bic(
            llf=row["test_map_expon"],
            nobs=20,
            df_modelwc=5, # temp, discount rate, forw, eff, dist
        ),
        axis=1)

In [20]:
best_param_rows.sum()["bic"]

43357.6687241661

In [21]:
np.mean(best_param_rows["discount_factor"]), np.std(best_param_rows["discount_factor"])

(0.4767950819672131, 0.25533548932132505)

In [22]:
np.mean(best_param_rows["temp"]), np.std(best_param_rows["temp"])

(0.9938524590163934, 1.0625244041334765)

# Model with non-depth cost, cluster jobs

In [23]:
for discount_factor in discount_factors:
    print(f"condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing{discount_factor} cost_function=dist_forw_eff param_file=dist_forw_eff")

condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.1 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.2 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.3 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.4 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.5 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.75 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Va

In [24]:
for discount_factor in discount_factors:
    print(f"condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main{discount_factor} cost_function=dist_forw_eff param_file=dist_forw_eff")

condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.1 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.2 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.3 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.4 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.5 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.75 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.85 cost_function=dist_forw_eff p

In [25]:
print(f"for experiment in {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do condor_submit_bid 2 submission_scripts/MPI-IS/05_Combine_Human.sub experiment=$experiment cost_function=dist_forw_eff;\n"
      f"done;")

for experiment in methods_main0.1 methods_main0.2 methods_main0.3 methods_main0.4 methods_main0.5 methods_main0.75 methods_main0.85 methods_main0.9 methods_main0.95 methods_main0.975 methods_main0.999;
    do condor_submit_bid 2 submission_scripts/MPI-IS/05_Combine_Human.sub experiment=$experiment cost_function=dist_forw_eff;
done;


In [26]:
print(f"for experiment in {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do condor_submit_bid 2 submission_scripts/MPI-IS/M_01_Get_MAP_File.sub experiment=$experiment cost_function=dist_forw_eff;\n"
      f"done;")

for experiment in methods_main0.1 methods_main0.2 methods_main0.3 methods_main0.4 methods_main0.5 methods_main0.75 methods_main0.85 methods_main0.9 methods_main0.95 methods_main0.975 methods_main0.999;
    do condor_submit_bid 2 submission_scripts/MPI-IS/M_01_Get_MAP_File.sub experiment=$experiment cost_function=dist_forw_eff;
done;


# # Fit of main dataset by discount factor model with cost

In [55]:
discount_factors = [.1,.2,.4,.5,.85,.95]
dfs = []
for discount_factor in discount_factors:
    with open(f"../../../data/processed/methods_main{discount_factor}/dist_forw_eff/mle_and_map.pickle", "rb") as f:
        data =pickle.load(f)
    
    df = data["SoftmaxPolicy"]["test_map_expon"][()]
    # TODO test same as simple
    df["discount_factor"] = discount_factor
    dfs.append(df)

with open(f"../../../data/processed/methods_main/dist_forw_eff/mle_and_map.pickle", "rb") as f:
    data =pickle.load(f)
    
        
    df = data["SoftmaxPolicy"]["test_map_expon"][()]
    # TODO test same as simple
    df["discount_factor"] = 1
    dfs.append(df)
discount_fitted_data = pd.concat(dfs)

In [56]:
discount_fitted_data = discount_fitted_data.reset_index()

In [57]:
best_param_rows = discount_fitted_data.loc[
    discount_fitted_data.groupby(["trace_pid"]).idxmax()["test_map_expon"]
]

  discount_fitted_data.groupby(["trace_pid"]).idxmax()["test_map_expon"]


In [58]:
from collections import Counter
np.all([counter == 1 for pid, counter in Counter(best_param_rows["trace_pid"]).most_common(5)])

True

In [59]:
# best_param_rows
from statsmodels.tools.eval_measures import bic

best_param_rows["bic"] = best_param_rows.apply(
        lambda row: bic(
            llf=row["test_map_expon"],
            nobs=20,
            df_modelwc=5, # temp, discount rate, forw, eff, dist
        ),
        axis=1)

In [60]:
best_param_rows.sum()["bic"]

35271.32372999168

In [61]:
np.mean(best_param_rows["discount_factor"]), np.std(best_param_rows["discount_factor"])

(0.7086065573770491, 0.33318462261017695)

In [62]:
Counter(best_param_rows["discount_factor"]).most_common()

[(0.95, 41), (1.0, 24), (0.1, 15), (0.4, 14), (0.85, 12), (0.5, 10), (0.2, 6)]

In [63]:
np.mean(best_param_rows["temp"]), np.std(best_param_rows["temp"])

(1.4180327868852458, 1.1130939301270473)

# Simulations

In [36]:
for discount_factor in discount_factors:
    print(f"condor_submit_bid 1 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=high_increasing{discount_factor} cost_function=linear_depth param_file=null")
print(f"condor_submit_bid 1 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=high_increasing cost_function=linear_depth param_file=depth")

condor_submit_bid 1 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=high_increasing0.1 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=high_increasing0.2 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=high_increasing0.3 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=high_increasing0.4 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=high_increasing0.5 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=high_increasing0.75 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/06_Simulate_Optimal.sub experiment_setting=high

```
for gamma in 0.1 0.2 0.3 0.4 0.5 0.75 0.85 0.9 0.95 0.975 0.999;do mkdir cluster/data/trajectories/high_increasing$gamma; done;

for gamma in 0.1 0.2 0.3 0.4 0.5 0.75 0.85 0.9 0.95 0.975 0.999;do mkdir cluster/data/trajectories/high_increasing$gamma/OptimalQ; done;

for gamma in 0.1 0.2 0.3 0.4 0.5 0.75 0.85 0.9 0.95 0.975 0.999;
do scp vfelso@login.cluster.is.localnet:/work/vfelso/planning-depth-differences/cluster/data/trajectories/high_increasing$gamma/OptimalQ/simulated_agents_linear_depth_0.00_1.00_91.00.csv  cluster/data/trajectories/high_increasing$gamma/OptimalQ/simulated_agents_linear_depth_0.00_1.00_91.00.csv; 
done

mkdir cluster/data/trajectories/high_increasing
mkdir cluster/data/trajectories/high_increasing/OptimalQ
rsync -aPzr --include "simulated_agents_linear_depth*" --include "*/" --exclude "*" vfelso@login.cluster.is.localnet:/work/vfelso/planning-depth-differences/cluster/data/trajectories/high_increasing/OptimalQ cluster/data/trajectories/high_increasing

rsync -aPzr --include "simulated_agents_depth_only*" --include "*/" --exclude "*" vfelso@login.cluster.is.localnet:/work/vfelso/planning-depth-differences/cluster/data/trajectories/high_increasing/OptimalQ cluster/data/trajectories/high_increasing
```


# Look at simulations

In [37]:
depth_data = pd.concat([pd.read_csv(f, index_col=0) for f in Path("../../../cluster/data/trajectories/high_increasing/OptimalQ/").glob("simulated_agents_linear_depth_*_91.00.csv")])
depth_only_data = pd.concat([pd.read_csv(f, index_col=0) for f in Path("../../../cluster/data/trajectories/high_increasing/OptimalQ/").glob("simulated_agents_depth_only_*_91.00.csv")])

In [38]:
depth_data.groupby(["sim_depth_cost_weight"]).mean()

  depth_data.groupby(["sim_depth_cost_weight"]).mean()


Unnamed: 0_level_0,n_steps,i_episode,finished,return,trial_id,actions,rewards,sim_exact,sim_temperature_file,sim_num_simulated,sim_num_trials,sim_seed,sim_static_cost_weight,pid
sim_depth_cost_weight,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
-5.0,13.696003,9.501049,1.0,170.873703,1.146243e+17,7.05849,13.371199,0.0,,54.221865,20.0,91.0,1.063876,26.612189
-2.5,13.5176,9.515829,1.0,107.983296,7.307269e+16,7.09931,8.630997,0.0,,56.560678,20.0,91.0,0.728505,27.77623
-1.0,12.684407,9.509322,1.0,70.870104,1.363922e+17,7.279407,6.159043,0.0,,55.68412,20.0,91.0,0.547208,27.326802
-0.1,11.905653,9.476758,1.0,57.395145,4.704697e+17,7.508223,5.878279,0.0,,10.0,20.0,91.0,-0.022484,4.502905
0.0,9.72391,9.529295,1.0,50.710475,4.196647e+17,8.00575,7.395792,0.0,,39.757557,20.0,91.0,0.271548,19.440438
0.1,10.215781,9.452049,1.0,54.658385,4.584939e+17,7.869466,7.012584,0.0,,10.0,20.0,91.0,-0.023157,4.502474
0.5,9.653321,9.517381,1.0,46.814723,4.056651e+17,7.974976,6.552327,0.0,,10.0,20.0,91.0,0.030522,4.473556
1.0,7.842421,9.490656,1.0,34.882873,4.328797e+17,8.421235,6.828057,0.0,,46.116743,20.0,91.0,0.342109,22.622292
2.5,6.477913,9.535244,1.0,13.882179,1.760264e+17,8.746095,4.689004,0.0,,51.102402,20.0,91.0,0.559582,25.214378
5.0,4.521978,9.516798,1.0,-0.404663,1.399173e+17,9.583907,2.119631,0.0,,50.719663,20.0,91.0,1.019768,24.920991


In [39]:
depth_only_data.groupby(["sim_depth_cost_weight"]).mean()

  depth_only_data.groupby(["sim_depth_cost_weight"]).mean()


Unnamed: 0_level_0,n_steps,i_episode,finished,return,trial_id,actions,rewards,sim_exact,sim_temperature_file,sim_cost_parameter_values,sim_num_simulated,sim_num_trials,sim_seed,pid
sim_depth_cost_weight,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
-5.0,11.21989,9.49593,1.0,113.074833,1.78801e+17,7.551589,11.095626,0.0,,-5.0,100.0,20.0,91.0,49.564535
-2.5,11.240762,9.498116,1.0,75.289536,1.169812e+17,7.545935,7.376487,0.0,,-2.5,100.0,20.0,91.0,49.522197
-1.0,10.221193,9.532738,1.0,52.700149,2.20108e+17,7.718683,5.806498,0.0,,-1.0,100.0,20.0,91.0,49.693309
0.0,6.278772,9.443878,1.0,38.413891,2.443176e+17,8.876141,9.982103,0.0,,0.0,100.0,20.0,91.0,49.568211
1.0,6.26626,9.451893,1.0,29.799602,6.096593e+17,8.870972,8.274698,0.0,,1.0,100.0,20.0,91.0,49.681706
2.5,6.054337,9.507554,1.0,19.120614,2.966608e+17,8.935916,6.570967,0.0,,2.5,100.0,20.0,91.0,49.063962
5.0,6.117969,9.534557,1.0,0.403003,3.319853e+17,8.8495,2.75286,0.0,,5.0,100.0,20.0,91.0,49.481173
7.5,4.199356,9.428418,1.0,0.105941,3.002324e+16,9.760558,2.525412,0.0,,7.5,100.0,20.0,91.0,49.544381
10.0,4.219843,9.468772,1.0,-11.576017,5.926656e+16,9.750892,-0.914347,0.0,,10.0,100.0,20.0,91.0,49.325125


In [40]:
discount_data = []
for discount_factor in discount_factors:
    curr_df = pd.read_csv(f"../../../cluster/data/trajectories/high_increasing{discount_factor}/OptimalQ/simulated_agents_linear_depth_0.00_1.00_91.00.csv", index_col=0) 
    curr_df["discount_factor"] = discount_factor
    discount_data.append(curr_df)
discount_data = pd.concat(discount_data)

In [41]:
discount_data.groupby(["discount_factor"]).mean()

  discount_data.groupby(["discount_factor"]).mean()


Unnamed: 0_level_0,n_steps,i_episode,finished,return,trial_id,actions,rewards,sim_exact,sim_temperature_file,sim_num_simulated,sim_num_trials,sim_seed,sim_static_cost_weight,sim_depth_cost_weight,pid
discount_factor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0.1,3.607178,9.46002,1.0,0.455035,4.159794e+17,8.419342,0.288136,0.0,,100.0,20.0,91.0,1.0,0.0,49.456431
0.2,4.994458,9.504619,1.0,0.102486,1.792935e+17,8.014516,0.123336,0.0,,100.0,20.0,91.0,1.0,0.0,49.591185
0.3,5.999756,9.460385,1.0,0.195319,2.558867e+17,8.861531,0.285178,0.0,,100.0,20.0,91.0,1.0,0.0,49.052657
0.4,6.144125,9.48801,1.0,2.94306,4.852885e+17,8.872302,1.001046,0.0,,100.0,20.0,91.0,1.0,0.0,49.236571
0.5,6.419195,9.577806,1.0,6.247087,5.497923e+17,8.7731,1.830315,0.0,,100.0,20.0,91.0,1.0,0.0,49.167378
0.75,6.371242,9.539385,1.0,19.339431,5.980326e+17,8.829294,5.124563,0.0,,100.0,20.0,91.0,1.0,0.0,49.459567
0.85,6.260446,9.524454,1.0,26.725285,3.754838e+17,8.883666,7.025233,0.0,,100.0,20.0,91.0,1.0,0.0,49.265907
0.9,6.462406,9.527807,1.0,29.26155,4.940007e+17,8.808981,7.559235,0.0,,100.0,20.0,91.0,1.0,0.0,49.275648
0.95,6.325505,9.46853,1.0,33.854711,3.684742e+17,8.848755,8.775787,0.0,,100.0,20.0,91.0,1.0,0.0,49.984735
0.975,6.255163,9.664491,1.0,36.155813,5.102019e+17,8.861144,9.428346,0.0,,100.0,20.0,91.0,1.0,0.0,49.123309


In [42]:
yaml_file = Path(
    "../../../data/inputs/yamls/cost_functions/linear_depth.yaml"
)
with open(str(yaml_file), "r") as stream:
    cost_details = yaml.safe_load(stream)

sim_cost_parameters = [
    f"sim_{param}" for param in cost_details["constant_values"].keys()
]

In [43]:
for classification, nodes in experiment_setting_data[
    "node_classification"
].items():
    discount_data[classification] = discount_data["actions"].apply(
        lambda action: action in nodes
    )
    depth_only_data[classification] = depth_only_data["actions"].apply(
        lambda action: action in nodes
    )

sum_discount_clicks = (
    discount_data.groupby(["pid", "i_episode", *sim_cost_parameters] + ["discount_factor"])
    .sum()
    .reset_index()
    .groupby(sim_cost_parameters + ["discount_factor"])
    .mean()
    .reset_index()
)

sum_depth_clicks = (
    depth_only_data.groupby(["pid", "i_episode", "sim_depth_cost_weight"])
    .sum()
    .reset_index()
    .groupby([ "sim_depth_cost_weight"])
    .mean()
    .reset_index()
)

  discount_data.groupby(["pid", "i_episode", *sim_cost_parameters] + ["discount_factor"])
  depth_only_data.groupby(["pid", "i_episode", "sim_depth_cost_weight"])


In [44]:
sum_depth_clicks[[ "sim_depth_cost_weight"]+["early", "middle", "late", "clicks"]]

Unnamed: 0,sim_depth_cost_weight,early,middle,late,clicks
0,-5.0,0.196,3.0,6.0,9.196
1,-2.5,0.2155,3.0,6.0,9.2155
2,-1.0,0.141,1.9235,6.0,8.0645
3,0.0,0.0155,0.139,3.064,3.2185
4,1.0,0.0085,0.1245,3.1335,3.2665
5,2.5,0.0,0.0965,3.0075,3.104
6,5.0,0.221,0.0,2.975,3.196
7,7.5,0.0,0.0,1.794,1.794
8,10.0,0.0,0.0,1.802,1.802


In [45]:
sum_discount_clicks[[ "discount_factor"]+["early", "middle", "late", "clicks"]]

Unnamed: 0,discount_factor,early,middle,late,clicks
0,0.1,1.5075,0.0,0.0,1.5075
1,0.2,1.6655,0.0,1.1235,2.789
2,0.3,0.2215,0.0,2.8805,3.102
3,0.4,0.2315,0.0,2.9385,3.17
4,0.5,0.105,0.12,3.1095,3.3345
5,0.75,0.0155,0.1465,3.129,3.291
6,0.85,0.014,0.134,3.064,3.212
7,0.9,0.017,0.165,3.1605,3.3425
8,0.95,0.0095,0.1405,3.108,3.258
9,0.975,0.0105,0.134,3.0685,3.213
