In [1]:
import json
import yaml
import numpy as np
import shutil
from pathlib import Path
import pandas as pd
from mouselab.mouselab import MouselabEnv
from mouselab.env_utils import get_ground_truths_from_json
import dill as pickle

# Creating data, modifying code

In [2]:
discount_factors = [.10, .20, .30, .40, .50,.75,.85,.9,.95,.975,.999]

In [3]:
for discount_factor in discount_factors:
    shutil.copyfile( Path(f"../../../data/inputs/exp_inputs/structure/312_2_4_24.json"), \
                     Path(f"../../../data/inputs/exp_inputs/structure/high_increasing{discount_factor}.json"))

In [4]:
with open(Path("../../../data/inputs/yamls/experiment_settings/high_increasing.yaml"), "rb") as f:
    experiment_setting_data = yaml.safe_load(f)
    
for discount_factor in discount_factors:
    experiment_setting_data["ground_truth_file"] = f"high_increasing{discount_factor}"
    experiment_setting_data["structure"] = f"high_increasing{discount_factor}"
    
    with open(Path(f"../../../data/inputs/yamls/experiment_settings/high_increasing{discount_factor}.yaml"), "w") as f:
        yaml.dump(experiment_setting_data, f)

In [5]:
env = MouselabEnv.new_symmetric_registered("high_increasing")

for discount_factor in discount_factors:
    with open(Path("../../../data/inputs/exp_inputs/rewards/312_2_4_24.json"), "rb") as f:
        data = json.load(f)
    
    for trial in data:
        trial["stateRewards"] = [round(reward*discount_factor**(env.mdp_graph.nodes[action]["depth"]-1),2) if env.mdp_graph.nodes[action]["depth"] != 0 else 0.0 for action, reward in enumerate(trial["stateRewards"])]

    with open(Path(f"../../../data/inputs/exp_inputs/rewards/high_increasing{discount_factor}.json"), "w") as f:
        json.dump(data, f)

In [6]:
ls ../../../data/inputs/yamls/experiments

c1.1.yaml             methods_main0.5.yaml    methods_main_extension.yaml
c2.1.yaml             methods_main0.75.yaml   methods_main.yaml
irl_validation.yaml   methods_main0.85.yaml   quest_feasibility.yaml
methods_main0.1.yaml  methods_main0.95.yaml   quest_first.yaml
methods_main0.2.yaml  methods_main0.975.yaml  quest_main.yaml
methods_main0.3.yaml  methods_main0.999.yaml  quest_second.yaml
methods_main0.4.yaml  methods_main0.9.yaml


In [7]:
with open(Path("../../../data/inputs/yamls/experiments/methods_main.yaml"), "rb") as f:
    data = yaml.safe_load(f)

for discount_factor in discount_factors:
    data['sessions'] = [f'methods_main{discount_factor}']
    data['experiment_setting'] = f'high_increasing{discount_factor}'
    with open(Path(f"../../../data/inputs/yamls/experiments/methods_main{discount_factor}.yaml"), "w") as f:
        yaml.dump(data, f)

Put this in `cluster/src/cluster_utils.py`

In [8]:
for discount_factor in discount_factors:
    print(f"elif name == 'high_increasing{discount_factor}':\n"
          f"    register(\n"
          f"        name=name,\n"
          f"        branching=[3, 1, 2],\n"
          f"        reward_inputs=\"depth\",\n"
          f"        reward_dictionary={{\n"
          f"            1: Categorical([-4, -2, 2, 4]),\n"
          f"            2: Categorical([{', '.join(str(round(val*discount_factor,2)) for val in [-8, -4, 4, 8])}]),\n"
          f"            3: Categorical([{', '.join(str(round(val*discount_factor**2,2)) for val in [-48, -24, 24, 48])}]),\n"
          f"        }},\n"
          f"    )")

elif name == 'high_increasing0.1':
    register(
        name=name,
        branching=[3, 1, 2],
        reward_inputs="depth",
        reward_dictionary={
            1: Categorical([-4, -2, 2, 4]),
            2: Categorical([-0.8, -0.4, 0.4, 0.8]),
            3: Categorical([-0.48, -0.24, 0.24, 0.48]),
        },
    )
elif name == 'high_increasing0.2':
    register(
        name=name,
        branching=[3, 1, 2],
        reward_inputs="depth",
        reward_dictionary={
            1: Categorical([-4, -2, 2, 4]),
            2: Categorical([-1.6, -0.8, 0.8, 1.6]),
            3: Categorical([-1.92, -0.96, 0.96, 1.92]),
        },
    )
elif name == 'high_increasing0.3':
    register(
        name=name,
        branching=[3, 1, 2],
        reward_inputs="depth",
        reward_dictionary={
            1: Categorical([-4, -2, 2, 4]),
            2: Categorical([-2.4, -1.2, 1.2, 2.4]),
            3: Categorical([-4.32, -2.16, 2.16, 4.32]),
        },
    )
elif name == 'high_increa

In [9]:
np.savetxt(
    Path("../../../cluster/parameters/cost/null.txt"),
    [[1.0, 0.0]],
    fmt="%.02f",
    delimiter=",",
)

In [10]:
mouselab_data = pd.read_csv(Path("../../../data/processed/methods_main/mouselab-mdp.csv"), index_col=0)
for discount_factor in discount_factors:
    with open(Path(f"../../../data/inputs/exp_inputs/rewards/high_increasing{discount_factor}.json"), "rb") as f:
        data = json.load(f)
    ground_truth_dict = {trial["trial_id"] : trial["stateRewards"] for trial in data}

    mouselab_data["state_rewards"] = mouselab_data["trial_id"].apply(lambda trial_id : ground_truth_dict[trial_id][1:])
    
    # need to change ground truths
    Path(f"../../../data/processed/methods_main{discount_factor}").mkdir(exist_ok = True, parents=True)
    mouselab_data.to_csv(Path(f"../../../data/processed/methods_main{discount_factor}/mouselab-mdp.csv"), index=False)

Generate cluster submission commands to run to get Q values

In [11]:
for discount_factor in discount_factors:
    print(f"condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing{discount_factor} cost_function=linear_depth param_file=null")

condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.1 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.2 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.3 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.4 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.5 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.75 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.85 cost_functio

Generate cluster submission commands to infer likelihood for methods_main:

In [12]:
for discount_factor in discount_factors:
    print(f"condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main{discount_factor} cost_function=linear_depth param_file=null")

condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.1 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.2 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.3 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.4 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.5 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.75 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.85 cost_function=linear_depth param_file=null
condor_submit_bid 1 submission_scripts/MPI-IS/

Combine data:

In [13]:
print(f"for experiment in {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do condor_submit_bid 2 submission_scripts/MPI-IS/05_Combine_Human.sub experiment=$experiment cost_function=linear_depth;\n"
      f"done;")

for experiment in methods_main0.1 methods_main0.2 methods_main0.3 methods_main0.4 methods_main0.5 methods_main0.75 methods_main0.85 methods_main0.9 methods_main0.95 methods_main0.975 methods_main0.999;
    do condor_submit_bid 2 submission_scripts/MPI-IS/05_Combine_Human.sub experiment=$experiment cost_function=linear_depth;
done;


MAP:

In [14]:
print(f"for experiment in {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do condor_submit_bid 2 submission_scripts/MPI-IS/M_01_Get_MAP_File.sub experiment=$experiment cost_function=linear_depth;\n"
      f"done;")

for experiment in methods_main0.1 methods_main0.2 methods_main0.3 methods_main0.4 methods_main0.5 methods_main0.75 methods_main0.85 methods_main0.9 methods_main0.95 methods_main0.975 methods_main0.999;
    do condor_submit_bid 2 submission_scripts/MPI-IS/M_01_Get_MAP_File.sub experiment=$experiment cost_function=linear_depth;
done;


Transfer over:

rsync -aPzr --include "methods_main*/*/*mle_and_map*" --include "*/" --exclude "*" vfelso@login.cluster.is.localnet:/work/vfelso/planning-depth-differences/data/processed/ data/processed

# Fit of main dataset by discount factor model without cost

In [15]:
dfs = []
for discount_factor in discount_factors:
    with open(f"../../../data/processed/methods_main{discount_factor}/linear_depth/mle_and_map.pickle", "rb") as f:
        data =pickle.load(f)
    
    df = data["SoftmaxPolicy"]["test_map_expon"][()]
    # TODO test same as simple
    df["discount_factor"] = discount_factor
    dfs.append(df)
    
discount_fitted_data = pd.concat(dfs)

In [16]:
discount_fitted_data = discount_fitted_data.reset_index()

In [17]:
best_param_rows = discount_fitted_data.loc[
    discount_fitted_data.groupby(["trace_pid"]).idxmax()["test_map_expon"]
]

  discount_fitted_data.groupby(["trace_pid"]).idxmax()["test_map_expon"]


In [18]:
from collections import Counter
np.all([counter == 1 for pid, counter in Counter(best_param_rows["trace_pid"]).most_common(5)])

True

In [19]:
# best_param_rows
from statsmodels.tools.eval_measures import bic

best_param_rows["bic"] = best_param_rows.apply(
        lambda row: bic(
            llf=row["test_map_expon"],
            nobs=20,
            df_modelwc=5, # temp, discount rate, forw, eff, dist
        ),
        axis=1)

In [20]:
best_param_rows.sum()["bic"]

43357.6687241661

In [21]:
np.mean(best_param_rows["discount_factor"]), np.std(best_param_rows["discount_factor"])

(0.4767950819672131, 0.25533548932132505)

In [22]:
np.mean(best_param_rows["temp"]), np.std(best_param_rows["temp"])

(0.9938524590163934, 1.0625244041334765)

# Model with non-depth cost, cluster jobs

In [23]:
for discount_factor in discount_factors:
    print(f"condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing{discount_factor} cost_function=dist_forw_eff param_file=dist_forw_eff")

condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.1 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.2 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.3 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.4 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.5 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Values.sub experiment_setting=high_increasing0.75 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/03_Get_Q_Va

In [24]:
for discount_factor in discount_factors:
    print(f"condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main{discount_factor} cost_function=dist_forw_eff param_file=dist_forw_eff")

condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.1 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.2 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.3 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.4 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.5 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.75 cost_function=dist_forw_eff param_file=dist_forw_eff
condor_submit_bid 1 submission_scripts/MPI-IS/04_Infer_Params.sub experiment=methods_main0.85 cost_function=dist_forw_eff p

In [25]:
print(f"for experiment in {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do condor_submit_bid 2 submission_scripts/MPI-IS/05_Combine_Human.sub experiment=$experiment cost_function=dist_forw_eff;\n"
      f"done;")

for experiment in methods_main0.1 methods_main0.2 methods_main0.3 methods_main0.4 methods_main0.5 methods_main0.75 methods_main0.85 methods_main0.9 methods_main0.95 methods_main0.975 methods_main0.999;
    do condor_submit_bid 2 submission_scripts/MPI-IS/05_Combine_Human.sub experiment=$experiment cost_function=dist_forw_eff;
done;


In [26]:
print(f"for experiment in {' '.join(['methods_main' + str(discount_factor) for discount_factor in discount_factors])};\n"
      f"    do condor_submit_bid 2 submission_scripts/MPI-IS/M_01_Get_MAP_File.sub experiment=$experiment cost_function=dist_forw_eff;\n"
      f"done;")

for experiment in methods_main0.1 methods_main0.2 methods_main0.3 methods_main0.4 methods_main0.5 methods_main0.75 methods_main0.85 methods_main0.9 methods_main0.95 methods_main0.975 methods_main0.999;
    do condor_submit_bid 2 submission_scripts/MPI-IS/M_01_Get_MAP_File.sub experiment=$experiment cost_function=dist_forw_eff;
done;


# # Fit of main dataset by discount factor model with cost

In [27]:
dfs = []
for discount_factor in discount_factors:
    with open(f"../../../data/processed/methods_main{discount_factor}/dist_forw_eff/mle_and_map.pickle", "rb") as f:
        data =pickle.load(f)
    
    df = data["SoftmaxPolicy"]["test_map_expon"][()]
    # TODO test same as simple
    df["discount_factor"] = discount_factor
    dfs.append(df)
    
discount_fitted_data = pd.concat(dfs)

In [28]:
discount_fitted_data = discount_fitted_data.reset_index()

In [29]:
best_param_rows = discount_fitted_data.loc[
    discount_fitted_data.groupby(["trace_pid"]).idxmax()["test_map_expon"]
]

  discount_fitted_data.groupby(["trace_pid"]).idxmax()["test_map_expon"]


In [30]:
from collections import Counter
np.all([counter == 1 for pid, counter in Counter(best_param_rows["trace_pid"]).most_common(5)])

True

In [31]:
# best_param_rows
from statsmodels.tools.eval_measures import bic

best_param_rows["bic"] = best_param_rows.apply(
        lambda row: bic(
            llf=row["test_map_expon"],
            nobs=20,
            df_modelwc=5, # temp, discount rate, forw, eff, dist
        ),
        axis=1)

In [32]:
best_param_rows.sum()["bic"]

35317.90865167498

In [33]:
np.mean(best_param_rows["discount_factor"]), np.std(best_param_rows["discount_factor"])

(0.7107377049180328, 0.3284117718003234)

In [34]:
Counter(best_param_rows["discount_factor"]).most_common()

[(0.999, 40),
 (0.1, 15),
 (0.75, 14),
 (0.9, 14),
 (0.4, 11),
 (0.95, 7),
 (0.3, 6),
 (0.85, 5),
 (0.5, 5),
 (0.2, 3),
 (0.975, 2)]

In [35]:
np.mean(best_param_rows["temp"]), np.std(best_param_rows["temp"])

(1.3627049180327868, 1.100904275766991)