In [1]:
import pickle
import numpy as np
import os
import matplotlib.pyplot as plt

def parse_data(paths):
    result_dict = {}
    dicts = []
    for path in paths:
        with open(path, 'rb') as f:
            dicts.append(pickle.load(f))
    for dict in dicts:
        for key in dict:
            next_entrance = dict[key].reshape([1, -1])
            if key in result_dict:
                result_dict[key] = np.append(result_dict[key], next_entrance, axis=0)
            else:
                result_dict[key] = next_entrance
    return result_dict

def file_crawler(path, substrings, exclude=[]):
    result = []
    for root, dirs, files in os.walk(path):
        for name in files:
            file_path = os.path.join(root, name)
            if all(s in file_path for s in substrings) and not any(e in file_path for e in exclude):
                result.append(file_path)
    print(f'for path: {path}: {len(result)}')
    return result

def plot_experiment_data(timesteps, experiments, names, plot_name, path=None, plot_closest=False):
    # create figure and axis objects
    fig, ax = plt.subplots()

    # find the experiment with the smallest number of timesteps n_timesteps with timestep values p_timesteps
    min_timesteps = min([len(t) for t in timesteps])
    p_timesteps = timesteps[[len(t) for t in timesteps].index(min_timesteps)]
    # for all other experiments, find the n_timesteps timesteps, that are the closest to p_timesteps

    new_experiments = []
    if plot_closest:
        for exp in range(len(experiments)):
            if timesteps[exp][0] != 0:
                experiments[exp] = np.concatenate((np.zeros_like(experiments[exp][:,:1]), experiments[exp]), axis=1)
                timesteps[exp] = np.concatenate((np.zeros_like(timesteps[exp][:1]), timesteps[exp]), axis=0)
            dist_timesteps = (timesteps[exp][None,:] - p_timesteps[:, None])**2
            ind_new_timesteps = np.argmin(dist_timesteps, axis=1)
            new_experiments.append(experiments[exp][:, ind_new_timesteps])
    else:
        new_experiments = experiments
            

    # loop over experiments
    for i, experiment in enumerate(new_experiments):
        # calculate mean and standard deviation of each time step for this experiment
        mean_data = np.mean(experiment, axis=0)
        std_data = 1 / np.sqrt(experiment.shape[0]) * np.std(experiment, axis=0)

        # plot mean data as a line and shade area between ±1 standard deviation
        if plot_closest:
            # plot the experiments at those timesteps
            ax.plot(p_timesteps, mean_data, '-o', markersize=5, label=names[i])
            ax.fill_between(p_timesteps, mean_data-std_data, mean_data+std_data, alpha=0.3)
        else:
            ax.plot(timesteps[i], mean_data, '-o', markersize=5, label=names[i])
            ax.fill_between(timesteps[i], mean_data-std_data, mean_data+std_data, alpha=0.3)

    # add labels, title, and legend to the plot
    ax.set_xlabel('Number Sampled Trajectories')
    ax.set_ylabel('Success Rate')
    ax.set_title(plot_name)
    ax.legend()

def make_plot(paths, includes, excludes, names, plot_name, save_path = None, plot_closest=False):
    abs_file_path_list = []
    
    for i in range(len(paths)):
        abs_file_path_list.append(file_crawler(path=paths[i], substrings=includes[i], exclude=excludes[i]))
    dicts = []
    
    for result in abs_file_path_list:
        dicts.append(parse_data(paths=result))

    plot_experiment_data(
        timesteps=[result_dict['step'][0] for result_dict in dicts], 
        experiments=[result_dict['success_rate'] for result_dict in dicts],
        names=names,
        plot_name=plot_name,
        path=save_path,
        plot_closest=plot_closest
        )

In [1]:
from active_critic.utils.dataset import DatasetAC
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler
import torch as th
data = DatasetAC(batch_size=2, device='cpu')

batch_size = 4
seq_len = 3

obs = th.ones([batch_size, 3, 4])
act = th.ones([batch_size, 3, 4])
act_hist = th.ones([batch_size, 3, 4])
rew = th.ones([batch_size, 3, 1])
exp_trjs = th.ones([batch_size], dtype=th.bool)
steps = th.ones([batch_size, 3], dtype=th.long)

obs[0] = 0
act[0] = 0
act_hist[0] = 0
rew[1] = 0
exp_trjs[0] = 0
steps[0] = 0

data.add_data(obsv=obs, actions=act, reward=rew, expert_trjs=exp_trjs, actions_history=act_hist, steps=steps)
data.onyl_positiv = True

  from .autonotebook import tqdm as notebook_tqdm


prev_proposed_acts: torch.Size([12, 3, 4])
steps 213 : torch.Size([4, 3])


In [2]:
def make_weights(len, gamma, exp_ind):
    weights = th.exp(-gamma*th.arange(len, 0, -1) / len)
    weights[exp_ind] = 1
    return weights

In [3]:
data.onyl_positiv = True
data_len =  data.__len__()
gamma = 1
exp_ind = data.expert_trjs[data.success]

In [4]:
weights = make_weights(len=data_len, gamma=gamma, exp_ind=exp_ind)

In [5]:
weights

tensor([0.3679, 1.0000, 1.0000])

In [6]:
data_len

3

In [7]:
sampler = WeightedRandomSampler(weights=weights, num_samples=data_len, replacement=True)
train_loader = DataLoader(data, batch_size=1, sampler=sampler)

In [10]:
data.virt_prev_proposed_acts.shape

torch.Size([12, 3, 4])

In [11]:
data.virt_actions.shape

torch.Size([4, 3, 4])

In [12]:
data.virt_obsv.shape

torch.Size([4, 3, 4])

In [8]:
indices = 0
for j in range(1000):
    for d in train_loader:
        virt_obsv, virt_actions, virt_reward, virt_expert_trjs, virt_prev_proposed_acts, virt_steps, prev_virt_obsv = d
        indices = indices + (virt_obsv.sum()==0)
        break
print(indices)

IndexError: The shape of the mask [4] at index 0 does not match the shape of the indexed tensor [12, 3, 4] at index 0

In [19]:
obs = th.ones([1, 3, 4])
act = th.ones([1, 3, 4])
act_hist = th.ones([1, 3, 3, 4])
rew = th.ones([1, 3, 1])
exp_trjs = th.ones([1])
steps = th.ones([1, 3], dtype=th.long)

obs[0] = 2
act[0] = 2
act_hist[0] = 2
rew[0] = 2
exp_trjs[0] = 2
steps[0] = 2

data.add_data(obsv=obs, actions=act, reward=rew, expert_trjs=exp_trjs, actions_history=act_hist, steps=steps)

In [23]:
for d in train_loader:
    virt_obsv, virt_actions, virt_reward, virt_expert_trjs, virt_prev_proposed_acts, virt_steps, prev_virt_obsv = d
    print(virt_obsv)
    break

tensor([[[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]]])


In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/0329_stats/AC_var_2023-03-29'

include_ac_0 = ['statsoptimized', 'reach only positive reach opt steps: 5 trainin eps: 500 opt mode: actor+plan demonstrations: 0, im_ph:False, training_episodes: 10, min critic: 1e-05, wd: 0.01, val_every: 1000 run id: 0']
exclude_ac_0 = []

include_ac_1 = ['statsoptimized', 'reach only positive! reach opt steps: 3 trainin eps: 500 opt mode: actor+plan demonstrations: 0, im_ph:False, training_episodes: 10, min critic: 1e-05, wd: 0.01, val_every: 3000 run id: 0']
exclude_ac_0 = []

#

make_plot(
    paths=[AC_path, AC_path], 
    includes=[include_ac_0, include_ac_1], 
    excludes=[exclude_ac_0, exclude_ac_0],
    names=['AC Imitation Only', 'AC Reinforcement', 'PPO + GAIL', 'BC + RPPO', 'TQC + GAIL'],
    plot_name='Reach Environment',
    save_path = None,
    plot_closest=True
    )

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_2023-03-07'
bl_path = '/home/hendrik/Documents/master_project/LokalData/server/baselines/Baselines_Stats_GAIL_2023-03-07'

include_ac_reach_4_im = ['reach', 'stats', 'True', 'demonstrations: 4']
exclude_ac_imitation = ['optimized']

include_ac_reach_4_ref = ['reach', 'stats', 'False', 'demonstrations: 4']
exclude_ac_imitation = ['optimized']

include_PPO_reach_4 = ['reach', 'demonstrations_4', 'PPO', 'learner_stats_gail']
exclude_bl_PPO = ['RPPO']

include_RPPO_reach_4 = ['reach', 'demonstrations_4', 'RPPO', 'learner_stats_rec_PPO_stepsize_10']
exclude_bl_RPPO = []

include_TQC_reach_4 = ['reach', 'demonstrations_4', 'TQC', 'learner_stats_gail_stepsize_10']
exclude_bl_RPPO = []

save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/reach 4'

make_plot(
    paths=[AC_path, AC_path, bl_path, bl_path, bl_path], 
    includes=[include_ac_reach_4_im, include_ac_reach_4_ref, include_PPO_reach_4, include_RPPO_reach_4, include_TQC_reach_4], 
    excludes=[exclude_ac_imitation, exclude_ac_imitation, exclude_bl_PPO, exclude_bl_RPPO, exclude_bl_RPPO],
    names=['AC Imitation Only', 'AC Reinforcement', 'PPO + GAIL', 'BC + RPPO', 'TQC + GAIL'],
    plot_name='Reach Environment',
    save_path = save_path
    )

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_2023-03-22'

bl_path = '/home/hendrik/Documents/master_project/LokalData/server/baselines/Baselines_Stats_GAIL_2023-03-20/'

include_bl_TQC = ['learner', 'TQC_reach_lr_1e-06_demonstrations_4_n_samples_20000_id_0']
exclude_bl_TQC = []

include_bl_PPO = ['learner', 'PPO_reach_lr_1e-05_demonstrations_4_n_samples_20000_id_0']
exclude_bl_PPO = []

include_ac = ['reach tiny planner trainin eps: 20000 opt mode: actor+plan demonstrations: 1, im_ph:False, training_episodes: 10, min critic: 5e-05, wd: 0.1, val_every: 2000', 'stats']
exclude_ac = ['optimize', 'id: 1']

include_ac_opt = ['reach tiny planner trainin eps: 20000 opt mode: actor+plan demonstrations: 1, im_ph:False, training_episodes: 10, min critic: 5e-05, wd: 0.1, val_every: 2000', 'statsoptimize']
exclude_ac_opt = ['id: 1']

save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/PickPlace_14/'

make_plot(
    paths=[AC_path, bl_path, bl_path], 
    includes=[include_ac, include_bl_TQC, include_bl_PPO], 
    excludes=[exclude_ac, exclude_bl_TQC, exclude_bl_PPO],
    names=['AC', 'AC Opt', 'TQC + GAIL'],
    plot_name='Pick and Place Environment',
    save_path = None,
    plot_closest=False
    )

In [None]:
from active_critic.utils.gym_utils import make_policy_dict
pd = make_policy_dict()

In [None]:
for key in pd:
    print(key)

In [None]:
from active_critic.analyze_stats.push_10_RPPO_TQC_PPO import make_final_plot

In [None]:
make_final_plot()

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_2023-03-20'

include_ac = ['stats']
exclude_ac = ['optimize', 'actions']


save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/PickPlace_14/'

make_plot(
    paths=[AC_path], 
    includes=[include_ac], 
    excludes=[exclude_ac],
    names=['AC', 'PPO + GAIL', 'TQC + GAIL'],
    plot_name='Pick and Place Environment',
    save_path = None,
    plot_closest=True
    )

In [None]:
PPO_path = '/home/hendrik/Documents/master_project/LokalData/server/baselines/Baselines_Stats_GAIL_2023-03-20'

include_PPO = ['PPO_reach_lr_1e-05_demonstrations_4_n_samples_20000', 'learner']
exclude_PPO = ['id_1']


save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/PickPlace_14/'

make_plot(
    paths=[PPO_path], 
    includes=[include_PPO], 
    excludes=[exclude_PPO],
    names=['PPO', 'PPO + GAIL', 'TQC + GAIL'],
    plot_name='Reach Environment',
    save_path = None,
    plot_closest=False
    )

In [None]:
import torch as th
action_histories = th.arange(0, 18).reshape([1, -1])

In [None]:
action_histories

In [None]:
action_histories = action_histories.reshape([2, 3, 3, 1]).repeat([1,1,1,2])

In [None]:
action_histories.shape

In [None]:
def pick_action_from_history(action_histories, steps):
    batch_count=th.arange(action_histories.shape[0]).reshape([1, -1]).repeat([action_histories.shape[1]*action_histories.shape[-1], 1]).T.reshape([-1])
    time_count = th.arange(action_histories.shape[1]).reshape([1, -1]).repeat([action_histories.shape[0],1]).reshape([1, action_histories.shape[0],-1]).repeat([action_histories.shape[-1], 1, 1]).transpose(-1,-2).reshape(-1)
    dim_count = th.arange(action_histories.shape[-1]).reshape([1, -1]).repeat([action_histories.shape[0]*action_histories.shape[1], 1]).reshape([-1])
    steps_count = steps.reshape([1, -1]).repeat([action_histories.shape[1] * action_histories.shape[-1], 1]).T.reshape([-1])
    result = action_histories[tuple((batch_count, steps_count, time_count, dim_count))].reshape([action_histories.shape[0], action_histories.shape[1], action_histories.shape[-1]])
    return result

In [None]:
a = th.rand([2, 3, 3, 4])
steps = th.tensor([1, 2])

In [None]:
actions_t = pick_action_from_history(action_histories=a, steps=steps)

In [None]:
a

In [None]:
actions_t

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/pickplace 14/AC/'
PPOGail = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/pickplace 14/PPO GAIL'
TQCPath = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/pickplace 14/TQC GAIL'
RPPOpath = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/pickplace 14/RPPO'

include_ac = ['stats']
exclude_ac = ['optimize']

include_PPOGail = ['learner']
exclude_bl = []

include_TQC = ['learner', 'lr_1e-07']
exclude_bl_TQC = []

include_RPPO = ['learner']
exclude_RPPO = ['id_2']

save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/PickPlace_14/'


make_plot(
    paths=[AC_path, PPOGail, TQCPath, RPPOpath], 
    includes=[include_ac, include_PPOGail, include_TQC, include_RPPO], 
    excludes=[exclude_ac, exclude_bl, exclude_bl_TQC, exclude_RPPO],
    names=['AC', 'PPO + GAIL', 'TQC + GAIL', 'RPPO + BC'],
    plot_name='Pick and Place Environment',
    save_path = None,
    plot_closest=True
    )

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_2023-02-23'
bl_path = '/home/hendrik/Documents/master_project/LokalData/server/baselines/Baselines_Stats_GAIL_2023-02-22/'
bl_RPPO_batch = '/home/hendrik/Documents/master_project/LokalData/server/baselines/Baselines_Stats_GAIL_2023-03-21'

include_ac = ['push demonstrations: 10, im_ph:False, training_episodes: 10, min critic: 5e-05, wd: 0.01, val_every: 5000', 'stats']
exclude_ac = ['optimized']



include_bl_PPO = ['demonstrations_10', 'learner', 'push', 'PPO', '0.0001']
exclude_bl = []

include_bl_TQC = ['TQC_GAIL_push_lr_1e-07_demonstrations_10', 'learner']
exclude_bl = []

include_bl_RPPO = ['RPPO_push_lr_1e-06_demonstrations_10_id', 'learner']
exclude_bl = []

save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/Push 6/'

make_plot(
    paths=[AC_path, bl_RPPO_batch, bl_path, bl_path], 
    includes=[include_ac, include_bl_RPPO, include_bl_PPO, include_bl_TQC], 
    excludes=[exclude_ac, exclude_bl, exclude_bl, exclude_bl],
    names=['AC', 'RPPO', 'PPO + GAIL', 'TQC + GAIL'],
    plot_name='Push Environment',
    save_path = None,
    plot_closest=True
    )

In [None]:
AC_Path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_2023-03-01/'

include_bl = ['pickplace demonstrations: 14, im_ph:False, training_episodes: 10, min critic: 5e-05, wd: 0.01, val_every: 2000', 'stats']
exclude_bl = ['20000']

save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/Push 6/'

make_plot(
    paths=[AC_Path], 
    includes=[include_bl, include_ac_opt, include_bl], 
    excludes=[exclude_bl, exclude_ac_opt, exclude_bl],
    names=['AC', 'AC Imitation', 'PPO + GAIL'],
    plot_name='Push Environment',
    save_path = None
    )

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_2023-02-23'
bl_path = '/home/hendrik/Documents/master_project/LokalData/server/baselines/Baselines_Stats_GAIL_2023-02-22/'


include_ac = ['push', '20','True', 'stats', '2000']
exclude_ac = ['optimize']

include_ac2 = ['push', '20','False', 'stats', '5000']
exclude_ac2 = ['optimize']

include_bl = ['demonstrations_20', 'learner', 'push', 'PPO', '0.0001']
exclude_bl = []

save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/Push 6/'

make_plot(
    paths=[AC_path, AC_path, bl_path], 
    includes=[include_ac, include_ac2, include_bl], 
    excludes=[exclude_ac, exclude_ac2, []],
    names=['AC Imitation', 'AC Reinfocement', 'PPO + GAIL'],
    plot_name='Push Environment',
    save_path = None
    )

In [None]:
from active_critic.analyze_stats.push_TQC_PPO_GAIL_5000_10000 import run_exp
run_exp()

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_2023-02-22_val_1000'
bl_path = '/home/hendrik/Documents/master_project/LokalData/server/baselines/Baselines_Stats_GAIL_2023-02-22/'

include_ac = ['push', '20','True', 'stats']
exclude_ac = ['optimize']

include_bl = ['demonstrations_20', 'learner', 'push', 'PPO', '0.0001']
exclude_bl = []


save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/Push 6/'

make_plot(
    paths=[AC_path, bl_path], 
    includes=[include_ac, include_bl], 
    excludes=[exclude_ac, exclude_bl],
    names=['AC', 'PPO + GAIL'],
    plot_name='Push Environment',
    save_path = None
    )

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_2023-02-22/'
bl_path = '/home/hendrik/Documents/master_project/LokalData/server/baselines/Baselines_Stats_GAIL_2023-02-22/'

include_ac1 = ['push', '15','False', 'stats']
exclude_ac1 = ['optimize']

include_ac2 = ['push', '15','True', 'stats']
exclude_ac2 = ['optimize']

save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/Push 6/'

make_plot(
    paths=[AC_path, AC_path], 
    includes=[include_ac1, include_ac2], 
    excludes=[exclude_ac1, []],
    names=['AC', 'AC Imitation'],
    plot_name='Push Environment',
    save_path = None
    )

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_test_2023-02-21'
bl_path = '/home/hendrik/Documents/master_project/LokalData/server/baselines/Baselines_Stats_GAIL_2023-02-21'

include_ac = ['pickplace demonstrations: 6, im_ph:False', 'stats']
exclude_ac = ['optimize']

include_bl14 = ['demonstrations_14', 'learner', 'push', 'PPO']
exclude_bl = []

include_bl10 = ['demonstrations_10', 'learner', 'push', 'PPO']
exclude_bl = []

include_bl6 = ['demonstrations_6', 'learner', 'push', 'TQC', 'id_0']
exclude_bl = []

save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/Push 6/'

make_plot(
    paths=[bl_path, bl_path, bl_path], 
    includes=[include_bl14, include_bl10, include_bl6], 
    excludes=[exclude_bl, exclude_bl, exclude_bl],
    names=['14', '10', '6'],
    plot_name='Push Environment',
    save_path = save_path
    )

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/pickplace 14/AC/'
PPOGail = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/pickplace 14/PPO GAIL'
TQCPath = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/pickplace 14/TQC GAIL'

include_ac = ['stats']
exclude_ac = ['optimize']

include_PPOGail = ['learner']
exclude_bl = []

include_TQC = ['learner']
exclude_bl = []

save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/PickPlace_14/'

make_plot(
    paths=[AC_path, PPOGail, TQCPath], 
    includes=[include_ac, include_PPOGail, include_TQC], 
    excludes=[exclude_ac, exclude_bl, exclude_bl],
    names=['AC', 'PPO + GAIL', 'TQC + GAIL'],
    plot_name='Pick and Place Environment',
    save_path = None,
    common_steps=True
    )

In [None]:
AC_path = '/home/hendrik/Documents/master_project/LokalData/server/AC/AC_var_test_2023-02-21'

include_ac = ['stats', 'window', '6']
exclude_ac = ['optimize']


save_path = '/home/hendrik/Documents/master_project/LokalData/server/Stats for MA/PickPlace_14/'

make_plot(
    paths=[AC_path], 
    includes=[include_ac], 
    excludes=[exclude_ac, exclude_bl, exclude_bl],
    names=['AC'],
    plot_name='Reach Environment',
    save_path = None
    )