## Data manipulation and visualization

In [19]:
# load library
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.switch_backend('agg')
plt.style.context('default')
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'white'

from predecessor.utils import mean_across_trials, std_across_trials, get_last_reward
from predecessor.utils import mean_across_episode, step_to_below_threshold_across_trials
from predecessor.utils import moving_average_across_trials, get_dStep_dEpi

%load_ext autoreload
%autoreload 2 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
raw_file_name = "noise1D_2023-12-22-11-09.pickle"
raw_folder = "../results/pickles/"
processed_folder = "../results/processed_pickles/"
raw_file = raw_folder + raw_file_name
save_file_name = processed_folder + raw_file_name
with open(raw_file, "rb") as f:
    exp_dic = pickle.load(f)

In [21]:
exp_dic.keys()

dict_keys(['Q', 'SF', 'Q(lambda)', 'PF', 'params'])

In [22]:
params = exp_dic["params"]
params

{'env size': 20, 'total trials': 100, 'episode': 3000, 'max step length': 100}

In [23]:
exp_dic["Q($\lambda$)"] = exp_dic.pop("Q(lambda)")

In [24]:
exp_dic.pop("params")
exp_dic.keys()

dict_keys(['Q', 'SF', 'PF', 'Q($\\lambda$)'])

In [25]:
algorithms = ["Q", "SF", "Q($\lambda$)", "PF"]

In [26]:
noise_list = list(exp_dic[algorithms[0]].keys())
noise_list

[0.05, 0.25, 0.5]

In [27]:
exp_dic["PF"][0.05].keys()  # lambda

dict_keys([0, 0.7, 0.8, 0.9])

In [28]:
lambda_list = list(exp_dic["PF"][0.05].keys())[1:]
# lambda_list = [0] + lambda_list
lambda_list

[0.7, 0.8, 0.9]

In [29]:
data_list = list(exp_dic[algorithms[1]][noise_list[0]][0].keys())
data_list

['noise_level', 'trials_step', 'trials_reward']

In [30]:
print("trial dim", len(exp_dic[algorithms[1]][noise_list[0]][0]["trials_reward"]))
print("episode dim", len(exp_dic[algorithms[1]][noise_list[0]][0]["trials_reward"][0]))

trial dim 100
episode dim 3000


In [31]:
# print("trial_q dim", len(exp_dic[algorithms[1]][noise_list[0]][0]["trials_q"]))
# print("episode_q dim", len(exp_dic[algorithms[1]][noise_list[0]][0]["trials_q"][0]))

In [32]:
# exp_dic["PF"][noise_list[0]][0.7]["trials_q"][0][2998]

In [33]:
window_size = 20
thresholds = [20, 40, 60]
# thresholds = [9, 17, 25]

In [34]:
new_exp_dic = {}
new_exp_dic = {"Q": {}, "Q($\lambda$)": {}, "SF": {}, "PF": {}, "params": params}
for agent_type in algorithms:
    new_exp_dic[agent_type] = {}
    for noise_level in noise_list:
        new_exp_dic[agent_type][noise_level] = {}
        if agent_type in ["Q", "SF"]:
            new_exp_dic[agent_type][noise_level][0] = {}
            rewards = exp_dic[agent_type][noise_level][0]["trials_reward"]
            steps = exp_dic[agent_type][noise_level][0]["trials_step"]
            #            q_value = exp_dic[agent_type][noise_level][0]["trials_q"]
            new_exp_dic[agent_type][noise_level][0]["mean_rewards"] = (
                mean_across_trials(rewards)
            )
            new_exp_dic[agent_type][noise_level][0]["std_rewards"] = std_across_trials(
                rewards
            )
            new_exp_dic[agent_type][noise_level][0]["last_reward"] = get_last_reward(
                rewards
            )
            new_exp_dic[agent_type][noise_level][0]["mean_step_length"] = (
                mean_across_trials(steps)
            )
            new_exp_dic[agent_type][noise_level][0]["std_step_length"] = (
                std_across_trials(steps)
            )
            try:
                new_exp_dic[agent_type][noise_level][0]["mean_q"] = mean_across_trials(
                    q_value
                )
                new_exp_dic[agent_type][noise_level][0]["std_q"] = std_across_trials(
                    q_value
                )
            except:
                pass
            new_exp_dic[agent_type][noise_level][0]["mean_epi_step"] = (
                mean_across_episode(steps)
            )
            ma_step = moving_average_across_trials(steps, window_size)
            new_exp_dic[agent_type][noise_level][0]["mean_ma_step"] = (
                mean_across_trials(ma_step)
            )
            new_exp_dic[agent_type][noise_level][0]["std_ma_step"] = std_across_trials(
                ma_step
            )
            new_exp_dic[agent_type][noise_level][0]["last_ma_step"] = get_last_reward(
                ma_step
            )
            new_exp_dic[agent_type][noise_level][0]["mean_dStep_dEpi"] = (
                mean_across_trials(
                    get_dStep_dEpi(steps, window_size=window_size, dEpi=1)
                )
            )
            new_exp_dic[agent_type][noise_level][0]["std_dStep_dEpi"] = (
                std_across_trials(
                    get_dStep_dEpi(steps, window_size=window_size, dEpi=1)
                )
            )
            new_exp_dic[agent_type][noise_level][0]["early_dStep_dEpi"] = (
                get_dStep_dEpi(steps, window_size=window_size, dEpi=1)[:, :450]
            )
            for threshold in thresholds:
                new_exp_dic[agent_type][noise_level][0][
                    "threshold_" + str(threshold)
                ] = step_to_below_threshold_across_trials(steps, threshold, window_size)

        elif agent_type in ["Q($\lambda$)", "PF"]:
            for lambda_level in lambda_list:
                new_exp_dic[agent_type][noise_level][lambda_level] = {}
            for lambda_level in lambda_list:
                rewards = exp_dic[agent_type][noise_level][lambda_level][
                    "trials_reward"
                ]
                steps = exp_dic[agent_type][noise_level][lambda_level]["trials_step"]
                #               q_value = exp_dic[agent_type][noise_level][lambda_level]["trials_q"]
                new_exp_dic[agent_type][noise_level][lambda_level]["mean_rewards"] = (
                    mean_across_trials(rewards)
                )
                new_exp_dic[agent_type][noise_level][lambda_level]["std_rewards"] = (
                    std_across_trials(rewards)
                )
                new_exp_dic[agent_type][noise_level][lambda_level]["last_reward"] = (
                    get_last_reward(rewards)
                )
                new_exp_dic[agent_type][noise_level][lambda_level][
                    "mean_step_length"
                ] = mean_across_trials(steps)
                new_exp_dic[agent_type][noise_level][lambda_level][
                    "std_step_length"
                ] = std_across_trials(steps)
                try:
                    new_exp_dic[agent_type][noise_level][lambda_level]["mean_q"] = (
                        mean_across_trials(q_value)
                    )
                    new_exp_dic[agent_type][noise_level][lambda_level]["std_q"] = (
                        std_across_trials(q_value)
                    )
                except:
                    pass

                new_exp_dic[agent_type][noise_level][lambda_level]["mean_epi_step"] = (
                    mean_across_episode(steps)
                )
                ma_step = moving_average_across_trials(steps, window_size)
                new_exp_dic[agent_type][noise_level][lambda_level]["mean_ma_step"] = (
                    mean_across_trials(ma_step)
                )
                new_exp_dic[agent_type][noise_level][lambda_level]["std_ma_step"] = (
                    std_across_trials(ma_step)
                )
                new_exp_dic[agent_type][noise_level][lambda_level]["last_ma_step"] = (
                    get_last_reward(ma_step)
                )
                new_exp_dic[agent_type][noise_level][lambda_level][
                    "mean_dStep_dEpi"
                ] = mean_across_trials(
                    get_dStep_dEpi(steps, window_size=window_size, dEpi=1)
                )
                new_exp_dic[agent_type][noise_level][lambda_level]["std_dStep_dEpi"] = (
                    std_across_trials(
                        get_dStep_dEpi(steps, window_size=window_size, dEpi=1)
                    )
                )
                new_exp_dic[agent_type][noise_level][lambda_level][
                    "early_dStep_dEpi"
                ] = get_dStep_dEpi(steps, window_size=window_size, dEpi=1)[:, :450]
                for threshold in thresholds:
                    new_exp_dic[agent_type][noise_level][lambda_level][
                        "threshold_" + str(threshold)
                    ] = step_to_below_threshold_across_trials(
                        steps, threshold, window_size
                    )

In [35]:
new_exp_dic["PF"][0.05].keys()

dict_keys([0.7, 0.8, 0.9])

In [36]:
with open(save_file_name, "wb") as f:
    pickle.dump(new_exp_dic, f, pickle.HIGHEST_PROTOCOL)