# Overview

In this notebook, we walk through all the necessary components of running experiments on LIBERO, and some common usage such as defining your own algorithm and policy architectures in the codebase.

1. Dataset preparation for your algorithms
2. Write your own algorithm
    - Subclassing from `Sequential` base class
3. Write your own model
4. Write your training loop
5. Visualize results

## 1. Experiments

In [1]:
from hydra import compose, initialize

from libero.libero import benchmark, get_libero_path
import hydra
import pprint
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['PYOPENGL_PLATFORM'] = 'egl'
from omegaconf import OmegaConf
import yaml
from easydict import EasyDict
from libero.libero.benchmark import get_benchmark
from libero.lifelong.datasets import (GroupedTaskDataset, SequenceVLDataset, get_dataset)
from libero.lifelong.utils import (get_task_embs, safe_device, create_experiment_dir)
hydra.core.global_hydra.GlobalHydra.instance().clear()

### load the default hydra config
initialize(config_path="../libero/configs")
hydra_cfg = compose(config_name="config")
yaml_config = OmegaConf.to_yaml(hydra_cfg)
cfg = EasyDict(yaml.safe_load(yaml_config))

pp = pprint.PrettyPrinter(indent=2)
pp.pprint(cfg.policy)

# prepare lifelong learning
cfg.folder = get_libero_path("datasets")
cfg.bddl_folder = get_libero_path("bddl_files")
cfg.init_states_folder = get_libero_path("init_states")
cfg.eval.num_procs = 1
cfg.eval.n_eval = 5

cfg.train.n_epochs = 25

pp.pprint(f"Note that the number of epochs used in this example is intentionally reduced to 5.")

task_order = cfg.data.task_order_index # can be from {0 .. 21}, default to 0, which is [task 0, 1, 2 ...]
cfg.benchmark_name = "libero_object" # can be from {"libero_spatial", "libero_object", "libero_goal", "libero_10"}
benchmark = get_benchmark(cfg.benchmark_name)(task_order)

# prepare datasets from the benchmark
datasets = []
descriptions = []
shape_meta = None
n_tasks = benchmark.n_tasks

for i in range(n_tasks):
    # currently we assume tasks from same benchmark have the same shape_meta
    task_i_dataset, shape_meta = get_dataset(
            dataset_path=os.path.join(cfg.folder, benchmark.get_task_demonstration(i)),
            obs_modality=cfg.data.obs.modality,
            initialize_obs_utils=(i==0),
            seq_len=cfg.data.seq_len,
    )
    # add language to the vision dataset, hence we call vl_dataset
    descriptions.append(benchmark.get_task(i).language)
    datasets.append(task_i_dataset)

task_embs = get_task_embs(cfg, descriptions)
benchmark.set_task_embs(task_embs)

datasets = [SequenceVLDataset(ds, emb) for (ds, emb) in zip(datasets, task_embs)]
n_demos = [data.n_demos for data in datasets]
n_sequences = [data.total_num_sequences for data in datasets]

  from .autonotebook import tqdm as notebook_tqdm
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path="../libero/configs")


{ 'color_aug': { 'network': 'BatchWiseImgColorJitterAug',
                 'network_kwargs': { 'brightness': 0.3,
                                     'contrast': 0.3,
                                     'epsilon': 0.1,
                                     'hue': 0.3,
                                     'input_shape': None,
                                     'saturation': 0.3}},
  'embed_size': 64,
  'extra_hidden_size': 128,
  'extra_num_layers': 0,
  'image_encoder': { 'network': 'ResnetEncoder',
                     'network_kwargs': { 'freeze': False,
                                         'language_fusion': 'film',
                                         'no_stride': False,
                                         'pretrained': False,
                                         'remove_layer_num': 4}},
  'language_encoder': { 'network': 'MLPEncoder',
                        'network_kwargs': { 'hidden_size': 128,
                                            'input_size': 768,
 

Downloading tokenizer_config.json: 100%|██████████| 49.0/49.0 [00:00<00:00, 247kB/s]
Downloading config.json: 100%|██████████| 570/570 [00:00<00:00, 1.73MB/s]
Downloading vocab.txt: 100%|██████████| 208k/208k [00:00<00:00, 11.1MB/s]
Downloading tokenizer.json: 100%|██████████| 426k/426k [00:00<00:00, 13.2MB/s]
Downloading pytorch_model.bin: 100%|██████████| 416M/416M [00:03<00:00, 111MB/s]  


## 2. Write your own policy architecture

In [2]:
import robomimic.utils.tensor_utils as TensorUtils
import torch
import torch.nn as nn

from einops import rearrange, repeat
from libero.lifelong.models.modules.rgb_modules import *
from libero.lifelong.models.modules.language_modules import *
from libero.lifelong.models.base_policy import BasePolicy
from libero.lifelong.models.policy_head import *
from libero.lifelong.models.modules.transformer_modules import *

###############################################################################
#
# A model handling extra input modalities besides images at time t.
#
###############################################################################

class ExtraModalityTokens(nn.Module):
    def __init__(
        self,
        use_joint=False,
        use_gripper=False,
        use_ee=False,
        extra_num_layers=0,
        extra_hidden_size=64,
        extra_embedding_size=32,
    ):
        """
        This is a class that maps all extra modality inputs into tokens of the same size
        """
        super().__init__()
        self.use_joint = use_joint
        self.use_gripper = use_gripper
        self.use_ee = use_ee
        self.extra_embedding_size = extra_embedding_size

        joint_states_dim = 7
        gripper_states_dim = 2
        ee_dim = 3

        self.num_extra = int(use_joint) + int(use_gripper) + int(use_ee)

        extra_low_level_feature_dim = (
            int(use_joint) * joint_states_dim
            + int(use_gripper) * gripper_states_dim
            + int(use_ee) * ee_dim
        )

        assert extra_low_level_feature_dim > 0, "[error] no extra information"

        self.extra_encoders = {}

        def generate_proprio_mlp_fn(modality_name, extra_low_level_feature_dim):
            assert extra_low_level_feature_dim > 0  # we indeed have extra information
            if extra_num_layers > 0:
                layers = [nn.Linear(extra_low_level_feature_dim, extra_hidden_size)]
                for i in range(1, extra_num_layers):
                    layers += [
                        nn.Linear(extra_hidden_size, extra_hidden_size),
                        nn.ReLU(inplace=True),
                    ]
                layers += [nn.Linear(extra_hidden_size, extra_embedding_size)]
            else:
                layers = [nn.Linear(extra_low_level_feature_dim, extra_embedding_size)]

            self.proprio_mlp = nn.Sequential(*layers)
            self.extra_encoders[modality_name] = {"encoder": self.proprio_mlp}

        for (proprio_dim, use_modality, modality_name) in [
            (joint_states_dim, self.use_joint, "joint_states"),
            (gripper_states_dim, self.use_gripper, "gripper_states"),
            (ee_dim, self.use_ee, "ee_states"),
        ]:

            if use_modality:
                generate_proprio_mlp_fn(modality_name, proprio_dim)

        self.encoders = nn.ModuleList(
            [x["encoder"] for x in self.extra_encoders.values()]
        )

    def forward(self, obs_dict):
        """
        obs_dict: {
            (optional) joint_stats: (B, T, 7),
            (optional) gripper_states: (B, T, 2),
            (optional) ee: (B, T, 3)
        }
        map above to a latent vector of shape (B, T, H)
        """
        tensor_list = []

        for (use_modality, modality_name) in [
            (self.use_joint, "joint_states"),
            (self.use_gripper, "gripper_states"),
            (self.use_ee, "ee_states"),
        ]:

            if use_modality:
                tensor_list.append(
                    self.extra_encoders[modality_name]["encoder"](
                        obs_dict[modality_name]
                    )
                )

        x = torch.stack(tensor_list, dim=-2)
        return x

###############################################################################
#
# A Transformer policy
#
###############################################################################


class MyTransformerPolicy(BasePolicy):
    """
    Input: (o_{t-H}, ... , o_t)
    Output: a_t or distribution of a_t
    """

    def __init__(self, cfg, shape_meta):
        super().__init__(cfg, shape_meta)
        policy_cfg = cfg.policy

        ### 1. encode image
        embed_size = policy_cfg.embed_size
        transformer_input_sizes = []
        self.image_encoders = {}
        for name in shape_meta["all_shapes"].keys():
            if "rgb" in name or "depth" in name:
                kwargs = policy_cfg.image_encoder.network_kwargs
                kwargs.input_shape = shape_meta["all_shapes"][name]
                kwargs.output_size = embed_size
                kwargs.language_dim = (
                    policy_cfg.language_encoder.network_kwargs.input_size
                )
                self.image_encoders[name] = {
                    "input_shape": shape_meta["all_shapes"][name],
                    "encoder": eval(policy_cfg.image_encoder.network)(**kwargs),
                }

        self.encoders = nn.ModuleList(
            [x["encoder"] for x in self.image_encoders.values()]
        )

        ### 2. encode language
        policy_cfg.language_encoder.network_kwargs.output_size = embed_size
        self.language_encoder = eval(policy_cfg.language_encoder.network)(
            **policy_cfg.language_encoder.network_kwargs
        )

        ### 3. encode extra information (e.g. gripper, joint_state)
        self.extra_encoder = ExtraModalityTokens(
            use_joint=cfg.data.use_joint,
            use_gripper=cfg.data.use_gripper,
            use_ee=cfg.data.use_ee,
            extra_num_layers=policy_cfg.extra_num_layers,
            extra_hidden_size=policy_cfg.extra_hidden_size,
            extra_embedding_size=embed_size,
        )

        ### 4. define temporal transformer
        policy_cfg.temporal_position_encoding.network_kwargs.input_size = embed_size
        self.temporal_position_encoding_fn = eval(
            policy_cfg.temporal_position_encoding.network
        )(**policy_cfg.temporal_position_encoding.network_kwargs)

        self.temporal_transformer = TransformerDecoder(
            input_size=embed_size,
            num_layers=policy_cfg.transformer_num_layers,
            num_heads=policy_cfg.transformer_num_heads,
            head_output_size=policy_cfg.transformer_head_output_size,
            mlp_hidden_size=policy_cfg.transformer_mlp_hidden_size,
            dropout=policy_cfg.transformer_dropout,
        )

        policy_head_kwargs = policy_cfg.policy_head.network_kwargs
        policy_head_kwargs.input_size = embed_size
        policy_head_kwargs.output_size = shape_meta["ac_dim"]

        self.policy_head = eval(policy_cfg.policy_head.network)(
            **policy_cfg.policy_head.loss_kwargs,
            **policy_cfg.policy_head.network_kwargs
        )

        self.latent_queue = []
        self.max_seq_len = policy_cfg.transformer_max_seq_len

    def temporal_encode(self, x):
        pos_emb = self.temporal_position_encoding_fn(x)
        x = x + pos_emb.unsqueeze(1)  # (B, T, num_modality, E)
        sh = x.shape
        self.temporal_transformer.compute_mask(x.shape)

        x = TensorUtils.join_dimensions(x, 1, 2)  # (B, T*num_modality, E)
        x = self.temporal_transformer(x)
        x = x.reshape(*sh)
        return x[:, :, 0]  # (B, T, E)

    def spatial_encode(self, data):
        # 1. encode extra
        extra = self.extra_encoder(data["obs"])  # (B, T, num_extra, E)

        # 2. encode language, treat it as action token
        B, T = extra.shape[:2]
        text_encoded = self.language_encoder(data)  # (B, E)
        text_encoded = text_encoded.view(B, 1, 1, -1).expand(
            -1, T, -1, -1
        )  # (B, T, 1, E)
        encoded = [text_encoded, extra]

        # 3. encode image
        for img_name in self.image_encoders.keys():
            x = data["obs"][img_name]
            B, T, C, H, W = x.shape
            img_encoded = self.image_encoders[img_name]["encoder"](
                x.reshape(B * T, C, H, W),
                langs=data["task_emb"]
                .reshape(B, 1, -1)
                .repeat(1, T, 1)
                .reshape(B * T, -1),
            ).view(B, T, 1, -1)
            encoded.append(img_encoded)
        encoded = torch.cat(encoded, -2)  # (B, T, num_modalities, E)
        return encoded

    def forward(self, data):
        x = self.spatial_encode(data)
        x = self.temporal_encode(x)
        dist = self.policy_head(x)
        return dist

    def get_action(self, data):
        self.eval()
        with torch.no_grad():
            data = self.preprocess_input(data, train_mode=False)
            x = self.spatial_encode(data)
            self.latent_queue.append(x)
            if len(self.latent_queue) > self.max_seq_len:
                self.latent_queue.pop(0)
            x = torch.cat(self.latent_queue, dim=1)  # (B, T, H_all)
            x = self.temporal_encode(x)
            dist = self.policy_head(x[:, -1])
        action = dist.sample().detach().cpu()
        return action.view(action.shape[0], -1).numpy()

    def reset(self):
        self.latent_queue = []

## 3. Write your own lifelong learning algorithm

In [3]:
from libero.lifelong.algos.base import Sequential

### All lifelong learning algorithm should inherit the Sequential algorithm super class

class MyLifelongAlgo(Sequential):
    """
    The experience replay policy.
    """
    def __init__(self,
                 n_tasks,
                 cfg,
                 **policy_kwargs):
        super().__init__(n_tasks=n_tasks, cfg=cfg, **policy_kwargs)
        # define the learning policy
        self.datasets = []
        self.policy = eval(cfg.policy.policy_type)(cfg, cfg.shape_meta)

    def start_task(self, task):
        # what to do at the beginning of a new task
        super().start_task(task)

    def end_task(self, dataset, task_id, benchmark):
        # what to do when finish learning a new task
        self.datasets.append(dataset)

    def observe(self, data):
        # how the algorithm observes a data and returns a loss to be optimized
        loss = super().observe(data)
        return loss



# 4. Write your training script

In [4]:
cfg.policy.policy_type = "MyTransformerPolicy"
cfg.lifelong.algo = "MyLifelongAlgo"

create_experiment_dir(cfg)
cfg.shape_meta = shape_meta

import numpy as np
from tqdm import trange
from libero.lifelong.metric import evaluate_loss, evaluate_success

print("experiment directory is: ", cfg.experiment_dir)
algo = safe_device(MyLifelongAlgo(n_tasks, cfg), cfg.device)

result_summary = {
    'L_conf_mat': np.zeros((n_tasks, n_tasks)),   # loss confusion matrix
    'S_conf_mat': np.zeros((n_tasks, n_tasks)),   # success confusion matrix
    'L_fwd'     : np.zeros((n_tasks,)),           # loss AUC, how fast the agent learns
    'S_fwd'     : np.zeros((n_tasks,)),           # success AUC, how fast the agent succeeds
}

gsz = cfg.data.task_group_size

if (cfg.train.n_epochs < 50):
    print("NOTE: the number of epochs used in this example is intentionally reduced to 30 for simplicity.")
if (cfg.eval.n_eval < 20):
    print("NOTE: the number of evaluation episodes used in this example is intentionally reduced to 5 for simplicity.")

for i in trange(n_tasks):
    algo.train()
    s_fwd, l_fwd = algo.learn_one_task(datasets[i], i, benchmark, result_summary)
    # s_fwd is success rate AUC, when the agent learns the {0, e, 2e, ...} epochs
    # l_fwd is BC loss AUC, similar to s_fwd
    result_summary["S_fwd"][i] = s_fwd
    result_summary["L_fwd"][i] = l_fwd

    if cfg.eval.eval:
        algo.eval()
        # we only evaluate on the past tasks: 0 .. i
        L = evaluate_loss(cfg, algo, benchmark, datasets[:i+1]) # (i+1,)
        S = evaluate_success(cfg, algo, benchmark, list(range((i+1)*gsz))) # (i+1,)
        result_summary["L_conf_mat"][i][:i+1] = L
        result_summary["S_conf_mat"][i][:i+1] = S

        torch.save(result_summary, os.path.join(cfg.experiment_dir, f'result.pt'))

experiment directory is:  ./experiments/libero_object/MyLifelongAlgo/MyTransformerPolicy_seed10000/run_001


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


NOTE: the number of epochs used in this example is intentionally reduced to 30 for simplicity.
NOTE: the number of evaluation episodes used in this example is intentionally reduced to 5 for simplicity.


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss:  5.44 | time: 0.35
[info] evaluate task 0 takes 41.9 seconds
[info] Epoch:   0 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.70
[info] Epoch:   1 | train loss: -0.91 | time: 0.61
[info] Epoch:   2 | train loss: -4.01 | time: 0.60
[info] Epoch:   3 | train loss: -10.94 | time: 0.60
[info] Epoch:   4 | train loss: -13.92 | time: 0.60
[info] Epoch:   5 | train loss: -14.58 | time: 0.60
[info] evaluate task 0 takes 44.0 seconds
[info] Epoch:   5 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.73
[info] Epoch:   6 | train loss: -14.61 | time: 0.60
[info] Epoch:   7 | train loss: -15.07 | time: 0.60
[info] Epoch:   8 | train loss: -15.58 | time: 0.60
[info] Epoch:   9 | train loss: -15.83 | time: 0.60
[info] Epoch:  10 | train loss: -15.93 | time: 0.60
[info] evaluate task 0 takes 46.0 seconds
[info] Epoch:  10 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.77
[info] Epoch:  11 | train loss: -16.19 | time: 0.6

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
 10%|█         | 1/10 [19:56<2:59:25, 1196.21s/it]

[info] evaluate task 0 takes 15.6 seconds


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss: -11.62 | time: 0.30
[info] evaluate task 1 takes 42.5 seconds
[info] Epoch:   0 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.71
[info] Epoch:   1 | train loss: -15.65 | time: 0.56
[info] Epoch:   2 | train loss: -16.21 | time: 0.56
[info] Epoch:   3 | train loss: -16.38 | time: 0.56
[info] Epoch:   4 | train loss: -16.51 | time: 0.55
[info] Epoch:   5 | train loss: -16.62 | time: 0.55
[info] evaluate task 1 takes 35.9 seconds
[info] Epoch:   5 | succ: 0.20 ± 0.35 | best succ: 0.2 | succ. AoC 0.10 | time: 0.60
[info] Epoch:   6 | train loss: -16.82 | time: 0.55
[info] Epoch:   7 | train loss: -16.97 | time: 0.55
[info] Epoch:   8 | train loss: -17.12 | time: 0.55
[info] Epoch:   9 | train loss: -17.25 | time: 0.56
[info] Epoch:  10 | train loss: -17.38 | time: 0.56
[info] evaluate task 1 takes 12.7 seconds
[info] Epoch:  10 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 0.40 | time: 0.21
[info] Epoch:  11 | train loss: -17.53 | time: 

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].as

[info] evaluate task 0 takes 43.7 seconds


 20%|██        | 2/10 [38:07<2:31:16, 1134.61s/it]

[info] evaluate task 1 takes 19.4 seconds


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss: -13.28 | time: 0.28
[info] evaluate task 2 takes 36.8 seconds
[info] Epoch:   0 | succ: 0.40 ± 0.43 | best succ: 0.4 | succ. AoC 0.40 | time: 0.61
[info] Epoch:   1 | train loss: -16.89 | time: 0.51
[info] Epoch:   2 | train loss: -17.40 | time: 0.51
[info] Epoch:   3 | train loss: -17.53 | time: 0.51
[info] Epoch:   4 | train loss: -17.61 | time: 0.51
[info] Epoch:   5 | train loss: -17.72 | time: 0.51
[info] evaluate task 2 takes 11.4 seconds
[info] Epoch:   5 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 0.70 | time: 0.19
[info] Epoch:   6 | train loss: -17.85 | time: 0.51
[info] Epoch:   7 | train loss: -18.03 | time: 0.51
[info] Epoch:   8 | train loss: -18.18 | time: 0.51
[info] Epoch:   9 | train loss: -18.35 | time: 0.51
[info] Epoch:  10 | train loss: -18.41 | time: 0.51
[info] evaluate task 2 takes 19.6 seconds
[info] Epoch:  10 | succ: 0.80 ± 0.35 | best succ: 1.0 | succ. AoC 0.80 | time: 0.33
[info] Epoch:  11 | train loss: -18.52 | time: 

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].as

[info] evaluate task 0 takes 42.9 seconds
[info] evaluate task 1 takes 41.7 seconds


 30%|███       | 3/10 [55:26<2:07:15, 1090.84s/it]

[info] evaluate task 2 takes 17.7 seconds


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss: -15.36 | time: 0.31
[info] evaluate task 3 takes 41.5 seconds
[info] Epoch:   0 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.69
[info] Epoch:   1 | train loss: -16.95 | time: 0.56
[info] Epoch:   2 | train loss: -17.29 | time: 0.56
[info] Epoch:   3 | train loss: -17.48 | time: 0.56
[info] Epoch:   4 | train loss: -17.61 | time: 0.57
[info] Epoch:   5 | train loss: -17.75 | time: 0.57
[info] evaluate task 3 takes 18.2 seconds
[info] Epoch:   5 | succ: 0.80 ± 0.35 | best succ: 0.8 | succ. AoC 0.40 | time: 0.30
[info] Epoch:   6 | train loss: -17.82 | time: 0.57
[info] Epoch:   7 | train loss: -17.94 | time: 0.57
[info] Epoch:   8 | train loss: -17.97 | time: 0.57
[info] Epoch:   9 | train loss: -18.26 | time: 0.57
[info] Epoch:  10 | train loss: -18.35 | time: 0.57
[info] evaluate task 3 takes 13.2 seconds
[info] Epoch:  10 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 0.60 | time: 0.22
[info] Epoch:  11 | train loss: -18.48 | time: 

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].as

[info] evaluate task 0 takes 42.8 seconds
[info] evaluate task 1 takes 41.6 seconds
[info] evaluate task 2 takes 21.9 seconds


 40%|████      | 4/10 [1:15:20<1:53:10, 1131.74s/it]

[info] evaluate task 3 takes 29.9 seconds


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss: -6.32 | time: 0.34
[info] evaluate task 4 takes 42.2 seconds
[info] Epoch:   0 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.70
[info] Epoch:   1 | train loss: -16.72 | time: 0.62
[info] Epoch:   2 | train loss: -17.34 | time: 0.62
[info] Epoch:   3 | train loss: -17.61 | time: 0.62
[info] Epoch:   4 | train loss: -17.76 | time: 0.62
[info] Epoch:   5 | train loss: -18.04 | time: 0.62
[info] evaluate task 4 takes 35.5 seconds
[info] Epoch:   5 | succ: 0.20 ± 0.35 | best succ: 0.2 | succ. AoC 0.10 | time: 0.59
[info] Epoch:   6 | train loss: -18.19 | time: 0.62
[info] Epoch:   7 | train loss: -18.34 | time: 0.62
[info] Epoch:   8 | train loss: -18.49 | time: 0.62
[info] Epoch:   9 | train loss: -18.71 | time: 0.63
[info] Epoch:  10 | train loss: -18.86 | time: 0.62
[info] evaluate task 4 takes 14.2 seconds
[info] Epoch:  10 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 0.40 | time: 0.24
[info] Epoch:  11 | train loss: -19.07 | time: 0

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].as

[info] evaluate task 0 takes 37.2 seconds
[info] evaluate task 1 takes 42.9 seconds
[info] evaluate task 2 takes 42.4 seconds
[info] evaluate task 3 takes 41.1 seconds


 50%|█████     | 5/10 [1:37:59<1:41:07, 1213.55s/it]

[info] evaluate task 4 takes 25.4 seconds


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss:  1.12 | time: 0.31
[info] evaluate task 5 takes 42.0 seconds
[info] Epoch:   0 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.70
[info] Epoch:   1 | train loss: -16.61 | time: 0.57
[info] Epoch:   2 | train loss: -17.56 | time: 0.57
[info] Epoch:   3 | train loss: -17.71 | time: 0.57
[info] Epoch:   4 | train loss: -18.01 | time: 0.57
[info] Epoch:   5 | train loss: -18.02 | time: 0.57
[info] evaluate task 5 takes 38.6 seconds
[info] Epoch:   5 | succ: 0.20 ± 0.35 | best succ: 0.2 | succ. AoC 0.10 | time: 0.64
[info] Epoch:   6 | train loss: -18.17 | time: 0.57
[info] Epoch:   7 | train loss: -18.41 | time: 0.57
[info] Epoch:   8 | train loss: -18.55 | time: 0.57
[info] Epoch:   9 | train loss: -18.75 | time: 0.57
[info] Epoch:  10 | train loss: -18.85 | time: 0.57
[info] evaluate task 5 takes 41.6 seconds
[info] Epoch:  10 | succ: 0.20 ± 0.35 | best succ: 0.2 | succ. AoC 0.13 | time: 0.69
[info] Epoch:  11 | train loss: -19.07 | time: 0

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].as

[info] evaluate task 0 takes 44.7 seconds
[info] evaluate task 1 takes 45.0 seconds
[info] evaluate task 2 takes 43.7 seconds
[info] evaluate task 3 takes 24.9 seconds
[info] evaluate task 4 takes 43.5 seconds


 60%|██████    | 6/10 [2:01:21<1:25:11, 1277.77s/it]

[info] evaluate task 5 takes 26.1 seconds


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss:  1.88 | time: 0.33
[info] evaluate task 6 takes 43.2 seconds
[info] Epoch:   0 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.72
[info] Epoch:   1 | train loss: -16.81 | time: 0.61
[info] Epoch:   2 | train loss: -17.67 | time: 0.61
[info] Epoch:   3 | train loss: -17.90 | time: 0.61
[info] Epoch:   4 | train loss: -18.19 | time: 0.61
[info] Epoch:   5 | train loss: -18.33 | time: 0.61
[info] evaluate task 6 takes 13.4 seconds
[info] Epoch:   5 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 0.50 | time: 0.22
[info] Epoch:   6 | train loss: -18.45 | time: 0.61
[info] Epoch:   7 | train loss: -18.64 | time: 0.61
[info] Epoch:   8 | train loss: -18.89 | time: 0.61
[info] Epoch:   9 | train loss: -19.02 | time: 0.61
[info] Epoch:  10 | train loss: -19.21 | time: 0.61
[info] evaluate task 6 takes 13.1 seconds
[info] Epoch:  10 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 0.67 | time: 0.22
[info] Epoch:  11 | train loss: -19.47 | time: 0

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].as

[info] evaluate task 0 takes 42.2 seconds
[info] evaluate task 1 takes 43.1 seconds
[info] evaluate task 2 takes 42.4 seconds
[info] evaluate task 3 takes 41.6 seconds
[info] evaluate task 4 takes 41.8 seconds
[info] evaluate task 5 takes 42.1 seconds


 70%|███████   | 7/10 [2:24:55<1:06:06, 1322.10s/it]

[info] evaluate task 6 takes 13.3 seconds


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss: -9.45 | time: 0.31
[info] evaluate task 7 takes 41.6 seconds
[info] Epoch:   0 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.69
[info] Epoch:   1 | train loss: -16.83 | time: 0.56
[info] Epoch:   2 | train loss: -17.57 | time: 0.56
[info] Epoch:   3 | train loss: -17.80 | time: 0.56
[info] Epoch:   4 | train loss: -18.10 | time: 0.56
[info] Epoch:   5 | train loss: -18.28 | time: 0.56
[info] evaluate task 7 takes 40.9 seconds
[info] Epoch:   5 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.68
[info] Epoch:   6 | train loss: -18.55 | time: 0.56
[info] Epoch:   7 | train loss: -18.73 | time: 0.56
[info] Epoch:   8 | train loss: -19.01 | time: 0.56
[info] Epoch:   9 | train loss: -19.26 | time: 0.56
[info] Epoch:  10 | train loss: -19.40 | time: 0.56
[info] evaluate task 7 takes 40.9 seconds
[info] Epoch:  10 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.68
[info] Epoch:  11 | train loss: -19.67 | time: 0

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].as

[info] evaluate task 0 takes 42.2 seconds
[info] evaluate task 1 takes 41.9 seconds
[info] evaluate task 2 takes 43.2 seconds
[info] evaluate task 3 takes 41.7 seconds
[info] evaluate task 4 takes 31.4 seconds
[info] evaluate task 5 takes 41.7 seconds
[info] evaluate task 6 takes 13.7 seconds


 80%|████████  | 8/10 [2:50:23<46:15, 1387.69s/it]  

[info] evaluate task 7 takes 41.7 seconds


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss: -16.60 | time: 0.33
[info] evaluate task 8 takes 13.5 seconds
[info] Epoch:   0 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 1.00 | time: 0.23
[info] Epoch:   1 | train loss: -18.03 | time: 0.62
[info] Epoch:   2 | train loss: -18.42 | time: 0.63
[info] Epoch:   3 | train loss: -18.51 | time: 0.63
[info] Epoch:   4 | train loss: -18.81 | time: 0.63
[info] Epoch:   5 | train loss: -18.92 | time: 0.63
[info] evaluate task 8 takes 19.7 seconds
[info] Epoch:   5 | succ: 0.80 ± 0.35 | best succ: 1.0 | succ. AoC 1.00 | time: 0.33
[info] Epoch:   6 | train loss: -19.15 | time: 0.62
[info] Epoch:   7 | train loss: -19.43 | time: 0.62
[info] Epoch:   8 | train loss: -19.37 | time: 0.62
[info] Epoch:   9 | train loss: -19.67 | time: 0.62
[info] Epoch:  10 | train loss: -19.81 | time: 0.62
[info] evaluate task 8 takes 13.5 seconds
[info] Epoch:  10 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 1.00 | time: 0.23
[info] Epoch:  11 | train loss: -19.89 | time: 

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].as

[info] evaluate task 0 takes 36.9 seconds
[info] evaluate task 1 takes 42.3 seconds
[info] evaluate task 2 takes 42.3 seconds
[info] evaluate task 3 takes 41.9 seconds
[info] evaluate task 4 takes 42.1 seconds
[info] evaluate task 5 takes 42.0 seconds
[info] evaluate task 6 takes 13.7 seconds
[info] evaluate task 7 takes 36.9 seconds


 90%|█████████ | 9/10 [3:15:13<23:39, 1419.79s/it]

[info] evaluate task 8 takes 12.9 seconds


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss: -5.89 | time: 0.29
[info] evaluate task 9 takes 41.7 seconds
[info] Epoch:   0 | succ: 0.00 ± 0.00 | best succ: 0.0 | succ. AoC 0.00 | time: 0.70
[info] Epoch:   1 | train loss: -17.51 | time: 0.54
[info] Epoch:   2 | train loss: -18.11 | time: 0.54
[info] Epoch:   3 | train loss: -18.45 | time: 0.54
[info] Epoch:   4 | train loss: -18.72 | time: 0.54
[info] Epoch:   5 | train loss: -18.85 | time: 0.54
[info] evaluate task 9 takes 12.3 seconds
[info] Epoch:   5 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 0.50 | time: 0.21
[info] Epoch:   6 | train loss: -19.10 | time: 0.54
[info] Epoch:   7 | train loss: -19.30 | time: 0.54
[info] Epoch:   8 | train loss: -19.43 | time: 0.54
[info] Epoch:   9 | train loss: -19.64 | time: 0.54
[info] Epoch:  10 | train loss: -19.82 | time: 0.54
[info] evaluate task 9 takes 12.2 seconds
[info] Epoch:  10 | succ: 1.00 ± 0.00 | best succ: 1.0 | succ. AoC 0.67 | time: 0.20
[info] Epoch:  11 | train loss: -20.03 | time: 0

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].as

[info] evaluate task 0 takes 43.0 seconds
[info] evaluate task 1 takes 43.0 seconds
[info] evaluate task 2 takes 13.7 seconds
[info] evaluate task 3 takes 41.5 seconds
[info] evaluate task 4 takes 41.9 seconds
[info] evaluate task 5 takes 36.2 seconds
[info] evaluate task 6 takes 41.9 seconds
[info] evaluate task 7 takes 42.3 seconds
[info] evaluate task 8 takes 42.4 seconds


100%|██████████| 10/10 [3:39:13<00:00, 1315.31s/it]

[info] evaluate task 9 takes 14.5 seconds





# 5. Visualize the results

### 5.1 Load results

In [5]:
result_summary = torch.load(os.path.join(cfg.experiment_dir, f'result.pt'))
print(result_summary["S_conf_mat"])
print(result_summary["S_fwd"])

[[1.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.8 0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.8 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.8 0.4 0.  0.  0.  0.  0.  0. ]
 [0.2 0.  0.  0.  0.6 0.  0.  0.  0.  0. ]
 [0.  0.  0.2 0.6 0.  0.6 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  1.  0.  0.  0. ]
 [0.  0.  0.  0.  0.4 0.  1.  0.  0.  0. ]
 [0.2 0.  0.  0.  0.  0.  1.  0.2 1.  0. ]
 [0.  0.  1.  0.  0.  0.2 0.  0.  0.  1. ]]
[0.1        0.7        0.9        0.8        0.7        0.36666667
 0.83333333 0.         1.         0.83333333]


### 5.2 Compute FWT, BWT, and AUC of the experiments

In [6]:
import torch
import numpy as np
from pathlib import Path

benchmark_map = {
    "libero_10"     : "LIBERO_10",
    "libero_90"     : "LIBERO_90",
    "libero_spatial": "LIBERO_SPATIAL",
    "libero_object" : "LIBERO_OBJECT",
    "libero_goal"   : "LIBERO_GOAL",
}

algo_map = {
    "base"     : "Sequential",
    "er"       : "ER",
    "ewc"      : "EWC",
    "packnet"  : "PackNet",
    "multitask": "Multitask",
    "custom_algo"   : "MyLifelongAlgo",
}

policy_map = {
    "bc_rnn_policy"        : "BCRNNPolicy",
    "bc_transformer_policy": "BCTransformerPolicy",
    "bc_vilt_policy"       : "BCViLTPolicy",
    "custom_policy"        : "MyTransformerPolicy",
}

seeds = [10000]
N_SEEDS = len(seeds)
N_TASKS = 10

def get_auc(experiment_dir, bench, algo, policy):
    N_EP = cfg.train.n_epochs // cfg.eval.eval_every + 1
    fwds = np.zeros((N_TASKS, N_EP, N_SEEDS))

    for task in range(N_TASKS):
        counter = 0
        for k, seed in enumerate(seeds):
            name = f"{experiment_dir}/task{task}_auc.log"
            try:
                succ = torch.load(name)["success"] # (n_epochs)
                idx = succ.argmax()
                succ[idx:] = succ[idx]
                fwds[task, :, k] = succ
            except:
                print("Some errors when loading results")
                continue
    return fwds

def compute_metric(res):
    mat, fwts  = res # fwds: (num_tasks, num_save_intervals, num_seeds)
    num_tasks, num_seeds = mat.shape[1:]
    ret = {}

    # compute fwt
    fwt = fwts.mean(axis=(0,1))
    ret["fwt"] = fwt
    # compute bwt
    bwts = []
    aucs = []
    for seed in range(num_seeds):
        bwt = 0.0
        auc = 0.0
        for k in range(num_tasks):
            bwt_k = 0.0
            auc_k = 0.0
            for tau in range(k+1, num_tasks):
                bwt_k += mat[k,k,seed] - mat[tau,k,seed]
                auc_k += mat[tau,k,seed]
            if k + 1 < num_tasks:
                bwt_k /= (num_tasks - k - 1)
            auc_k = (auc_k + fwts[k,:,seed].mean()) / (num_tasks - k)

            bwt += bwt_k
            auc += auc_k
        bwts.append(bwt / num_tasks)
        aucs.append(auc / num_tasks)
    bwts = np.array(bwts)
    aucs = np.array(aucs)
    ret["bwt"] = bwts
    ret["auc"] = aucs
    return ret

In [7]:
experiment_dir = "experiments"
benchmark_name = "libero_object"
algo_name = "custom_algo"
policy_name = "custom_policy"

fwds = get_auc(cfg.experiment_dir, benchmark_name, algo_name, policy_name)

conf_mat = result_summary["S_conf_mat"][..., np.newaxis]

metric = compute_metric((conf_mat, fwds))
print(metric)


{'fwt': array([0.62333333]), 'bwt': array([0.48731746]), 'auc': array([0.30952778])}


### 5.3 Visualize policy rollouts

 This is an example of how to use the trained model to do inference. We will take the policy from training on the first task as an example. More concrete example, please see `evaluate_one_task_success` in the file `lifelong/lifelong/metric.py`.

In [1]:
from IPython.display import HTML
from base64 import b64encode
import imageio

from libero.libero.envs import OffScreenRenderEnv, DummyVectorEnv
from libero.lifelong.metric import raw_obs_to_tensor_obs

# You can turn on subprocess
env_num = 1
action_dim = 7


# If it's packnet, the weights need to be processed first
task_id = 9
task = benchmark.get_task(task_id)
task_emb = benchmark.get_task_emb(task_id)

if cfg.lifelong.algo == "PackNet":
    algo = algo.get_eval_algo(task_id)

algo.eval()
env_args = {
    "bddl_file_name": os.path.join(
        cfg.bddl_folder, task.problem_folder, task.bddl_file
    ),
    "camera_heights": cfg.data.img_h,
    "camera_widths": cfg.data.img_w,
}

env = DummyVectorEnv(
            [lambda: OffScreenRenderEnv(**env_args) for _ in range(env_num)]
)

init_states_path = os.path.join(
    cfg.init_states_folder, task.problem_folder, task.init_states_file
)
init_states = torch.load(init_states_path)

env.reset()

init_state = init_states[0:1]
dones = [False]

algo.reset()

obs = env.set_init_state(init_state)


# Make sure the gripepr is open to make it consistent with the provided demos.
dummy_actions = np.zeros((env_num, action_dim))
for _ in range(5):
    obs, _, _, _ = env.step(dummy_actions)

steps = 0

obs_tensors = [[]] * env_num
while steps < cfg.eval.max_steps:
    steps += 1
    data = raw_obs_to_tensor_obs(obs, task_emb, cfg)
    action = algo.policy.get_action(data)

    obs, reward, done, info = env.step(action)

    for k in range(env_num):
        dones[k] = dones[k] or done[k]
        obs_tensors[k].append(obs[k]["agentview_image"])
    if all(dones):
        break

# visualize video
# obs_tensor: (env_num, T, H, W, C)

images = [img[::-1] for img in obs_tensors[0]]
fps = 30
writer  = imageio.get_writer('tmp_video.mp4', fps=fps)
for image in images:
    writer.append_data(image)
writer.close()

video_data = open("tmp_video.mp4", "rb").read()
video_tag = f'<video controls alt="test" src="data:video/mp4;base64,{b64encode(video_data).decode()}">'
HTML(data=video_tag)



  if LooseVersion(torch.__version__) < LooseVersion("1.0.0"):
  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'benchmark' is not defined