# Overview

In this notebook, we walk through all the necessary components of running experiments on LIBERO, and some common usage such as defining your own algorithm and policy architectures in the codebase.

1. Dataset preparation for your algorithms
2. Write your own algorithm
    - Subclassing from `Sequential` base class
3. Write your own model
4. Write your training loop
5. Visualize results

## 1. Experiments

In [1]:
from hydra import compose, initialize

from libero.libero import benchmark, get_libero_path
import hydra
import pprint
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['PYOPENGL_PLATFORM'] = 'egl'
from omegaconf import OmegaConf
import yaml
from easydict import EasyDict
from libero.libero.benchmark import get_benchmark
from libero.lifelong.datasets import (GroupedTaskDataset, SequenceVLDataset, get_dataset)
from libero.lifelong.utils import (get_task_embs, safe_device, create_experiment_dir)
hydra.core.global_hydra.GlobalHydra.instance().clear()

### load the default hydra config
initialize(config_path="../libero/configs")
hydra_cfg = compose(config_name="config")
yaml_config = OmegaConf.to_yaml(hydra_cfg)
cfg = EasyDict(yaml.safe_load(yaml_config))

pp = pprint.PrettyPrinter(indent=2)
pp.pprint(cfg.policy)

# prepare lifelong learning
cfg.folder = get_libero_path("datasets")
cfg.bddl_folder = get_libero_path("bddl_files")
cfg.init_states_folder = get_libero_path("init_states")
cfg.eval.num_procs = 1
cfg.eval.n_eval = 1

task_order = cfg.data.task_order_index # can be from {0 .. 21}, default to 0, which is [task 0, 1, 2 ...]
cfg.benchmark_name = "libero_spatial" # can be from {"libero_spatial", "libero_object", "libero_goal", "libero_10"}
benchmark = get_benchmark(cfg.benchmark_name)(task_order)

# prepare datasets from the benchmark
datasets = []
descriptions = []
shape_meta = None
n_tasks = benchmark.n_tasks

for i in range(n_tasks):
    # currently we assume tasks from same benchmark have the same shape_meta
    task_i_dataset, shape_meta = get_dataset(
            dataset_path=os.path.join(cfg.folder, benchmark.get_task_demonstration(i)),
            obs_modality=cfg.data.obs.modality,
            initialize_obs_utils=(i==0),
            seq_len=cfg.data.seq_len,
    )
    # add language to the vision dataset, hence we call vl_dataset
    descriptions.append(benchmark.get_task(i).language)
    datasets.append(task_i_dataset)

task_embs = get_task_embs(cfg, descriptions)
benchmark.set_task_embs(task_embs)

datasets = [SequenceVLDataset(ds, emb) for (ds, emb) in zip(datasets, task_embs)]
n_demos = [data.n_demos for data in datasets]
n_sequences = [data.total_num_sequences for data in datasets]

  from .autonotebook import tqdm as notebook_tqdm
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path="../libero/configs")


{ 'color_aug': { 'network': 'BatchWiseImgColorJitterAug',
                 'network_kwargs': { 'brightness': 0.3,
                                     'contrast': 0.3,
                                     'epsilon': 0.1,
                                     'hue': 0.3,
                                     'input_shape': None,
                                     'saturation': 0.3}},
  'embed_size': 64,
  'extra_hidden_size': 128,
  'extra_num_layers': 0,
  'image_encoder': { 'network': 'ResnetEncoder',
                     'network_kwargs': { 'freeze': False,
                                         'language_fusion': 'film',
                                         'no_stride': False,
                                         'pretrained': False,
                                         'remove_layer_num': 4}},
  'language_encoder': { 'network': 'MLPEncoder',
                        'network_kwargs': { 'hidden_size': 128,
                                            'input_size': 768,
 

## 2. Write your own policy architecture

In [2]:
import robomimic.utils.tensor_utils as TensorUtils
import torch
import torch.nn as nn

from einops import rearrange, repeat
from libero.lifelong.models.modules.rgb_modules import *
from libero.lifelong.models.modules.language_modules import *
from libero.lifelong.models.base_policy import BasePolicy
from libero.lifelong.models.policy_head import *


###############################################################################
#
# A model handling extra input modalities besides images at time t.
#
###############################################################################

class ExtraModalities:
    def __init__(self,
                 use_joint=False,
                 use_gripper=False,
                 use_ee=False,
                 extra_hidden_size=64,
                 extra_embedding_size=32):

        self.use_joint = use_joint
        self.use_gripper = use_gripper
        self.use_ee = use_ee
        self.extra_embedding_size = extra_embedding_size

        joint_states_dim = 7
        gripper_states_dim = 2
        ee_dim = 6

        self.extra_low_level_feature_dim = int(use_joint) * joint_states_dim + \
                int(use_gripper) * gripper_states_dim + \
                int(use_ee) * ee_dim
        assert self.extra_low_level_feature_dim > 0, "[error] no extra information"

    def __call__(self, obs_dict):
        """
        obs_dict: {
            (optional) joint_stats: (B, T, 7),
            (optional) gripper_states: (B, T, 2),
            (optional) ee: (B, T, 3) 
        }
        map above to a latent vector of shape (B, T, H)
        """
        tensor_list = []
        if self.use_joint:
            tensor_list.append(obs_dict["joint_states"])
        if self.use_gripper:
            tensor_list.append(obs_dict["gripper_states"])
        if self.use_ee:
            tensor_list.append(obs_dict["ee_states"])   
        x = torch.cat(tensor_list, dim=-1)
        return x
        
    def output_shape(self, input_shape, shape_meta):
        return (self.extra_low_level_feature_dim,)


###############################################################################
#
# A RNN policy
#
###############################################################################

class MyRNNPolicy(BasePolicy):
    """
    Input: (o_{t-H}, ... , o_t)
    Output: a_t or distribution of a_t
    """
    def __init__(self,
                 cfg, 
                 shape_meta):
        super().__init__(cfg, shape_meta)
        policy_cfg = cfg.policy

        ### 1. encode image
        rnn_input_size = 0
        image_embed_size = 64
        self.image_encoders = {}
        for name in shape_meta["all_shapes"].keys():
            if "rgb" in name or "depth" in name:
                kwargs = policy_cfg.image_encoder.network_kwargs
                kwargs.input_shape = shape_meta["all_shapes"][name]
                kwargs.output_size = image_embed_size 
                kwargs.language_dim = policy_cfg.language_encoder.network_kwargs.input_size
                self.image_encoders[name] = {
                    "input_shape": shape_meta["all_shapes"][name],
                    "encoder": eval(policy_cfg.image_encoder.network)(**kwargs)
                }
                rnn_input_size += image_embed_size
        self.encoders = nn.ModuleList([x["encoder"] for x in self.image_encoders.values()])

        ### 2. encode language
        text_embed_size = 32
        policy_cfg.language_encoder.network_kwargs.output_size = text_embed_size
        self.language_encoder = eval(policy_cfg.language_encoder.network)(
                **policy_cfg.language_encoder.network_kwargs)
        rnn_input_size += text_embed_size

        ### 3. encode extra information (e.g. gripper, joint_state)
        self.extra_encoder = ExtraModalities(
                 use_joint=cfg.data.use_joint,
                 use_gripper=cfg.data.use_gripper,
                 use_ee=cfg.data.use_ee)
        rnn_input_size += self.extra_encoder.extra_low_level_feature_dim
        bidirectional = False
        self.rnn = nn.LSTM(input_size=rnn_input_size,
                           hidden_size=1024,
                           num_layers=2,
                           batch_first=True,
                           dropout=0.0,
                           bidirectional=bidirectional)

        ### 4. use policy head to output action
        self.D = 2 if bidirectional else 1
        self.policy_head = GMMHead(
                input_size=1024,
                loss_coef=2.0,
                hidden_size=1024,
                num_layers=2,
                min_std=0.0001,
                num_modes=5,
                activation="softplus",
                output_size=shape_meta["ac_dim"])
        self.eval_h0 = None
        self.eval_c0 = None

    def forward(self, data, train_mode=True):
        # 1. encode image
        encoded = []
        for img_name in self.image_encoders.keys():
            x = data["obs"][img_name]
            B, T, C, H, W = x.shape
            e = self.image_encoders[img_name]["encoder"](
                    x.reshape(B*T, C, H, W),
                    langs=data["task_emb"].reshape(B,1,-1).repeat(1,T,1).reshape(B*T, -1)
            ).view(B,T,-1)
            encoded.append(e)

        # 2. add joint states, gripper info, etc.
        encoded.append(self.extra_encoder(data["obs"])) # add (B, T, H_extra)
        encoded = torch.cat(encoded, -1) # (B, T, H_all)

        # 3. language encoding
        lang_h = self.language_encoder(data) # (B, H)
        encoded = torch.cat([encoded,
            lang_h.unsqueeze(1).expand(-1, encoded.shape[1], -1)], dim=-1)

        # 4. apply temporal rnn
        if train_mode:
            h0 = torch.zeros(self.D * 2,
                             encoded.shape[0],
                             1024).to(self.device)
            c0 = torch.zeros(self.D * 2,
                             encoded.shape[0],
                             1024).to(self.device)
            output, (hn, cn) = self.rnn(encoded, (h0, c0))
        else:
            if self.eval_h0 is None:
                self.eval_h0 = torch.zeros(
                        self.D * 2,
                        encoded.shape[0],
                        1024).to(self.device)
                self.eval_c0 = torch.zeros(
                        self.D * 2,
                        encoded.shape[0],
                        1024).to(self.device)
            output, (h1, c1) = self.rnn(encoded, (self.eval_h0, self.eval_c0))
            self.eval_h0 = h1.detach()
            self.eval_c0 = c1.detach()

        dist = self.policy_head(output)
        return dist
    
    def get_action(self, data):
        self.eval()
        data = self.preprocess_input(data, train_mode=False)
        with torch.no_grad():
            dist = self.forward(data)
        action = dist.sample().detach().cpu()
        return action.view(action.shape[0], -1).numpy()

    def reset(self):
        self.eval_h0 = None
        self.eval_c0 = None

## 3. Write your own lifelong learning algorithm

In [3]:
from libero.lifelong.algos.base import Sequential

### All lifelong learning algorithm should inherit the Sequential algorithm super class

class MyLifelongAlgo(Sequential):
    """
    The experience replay policy.
    """
    def __init__(self,
                 n_tasks,
                 cfg,
                 **policy_kwargs):
        super().__init__(n_tasks=n_tasks, cfg=cfg, **policy_kwargs)
        # define the learning policy
        self.policy = MyRNNPolicy(cfg, cfg.shape_meta)

    def start_task(self, task):
        # what to do at the beginning of a new task
        super().start_task(task)

    def end_task(self, dataset, task_id, benchmark):
        # what to do when finish learning a new task
        self.datasets.append(dataset)

    def observe(self, data):
        # how the algorithm observes a data and returns a loss to be optimized
        loss = super().observe(data)
        return loss



# 4. Write your training script

In [4]:
create_experiment_dir(cfg)
cfg.shape_meta = shape_meta

import numpy as np

print("experiment directory is: ", cfg.experiment_dir)
algo = safe_device(MyLifelongAlgo(n_tasks, cfg), cfg.device)

result_summary = {
    'L_conf_mat': np.zeros((n_tasks, n_tasks)),   # loss confusion matrix
    'S_conf_mat': np.zeros((n_tasks, n_tasks)),   # success confusion matrix
    'L_fwd'     : np.zeros((n_tasks,)),           # loss AUC, how fast the agent learns
    'S_fwd'     : np.zeros((n_tasks,)),           # success AUC, how fast the agent succeeds
}

for i in range(n_tasks):
    algo.train()
    s_fwd, l_fwd = algo.learn_one_task(datasets[i], i, benchmark, result_summary)
    # s_fwd is success rate AUC, when the agent learns the {0, e, 2e, ...} epochs
    # l_fwd is BC loss AUC, similar to s_fwd
    result_summary["S_fwd"][i] = s_fwd
    result_summary["L_fwd"][i] = l_fwd

    if cfg.eval.eval:
        algo.eval()
        # we only evaluate on the past tasks: 0 .. i
        L = evaluate_loss(cfg, algo, benchmark, datasets[:i+1]) # (i+1,)
        S = evaluate_success(cfg, algo, benchmark, list(range((i+1)*gsz))) # (i+1,)
        result_summary["L_conf_mat"][i][:i+1] = L
        result_summary["S_conf_mat"][i][:i+1] = S

        torch.save(result_summary, os.path.join(cfg.experiment_dir, f'result.pt'))

experiment directory is:  ./experiments/libero_spatial/Sequential/BCTransformerPolicy_seed10000/run_024


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pad_mask = pad_mask[:, None].astype(np.bool)


[info] Epoch:   0 | train loss: 11.42 | time: 0.40


TypeError: (): incompatible function arguments. The following argument types are supported:
    1. (arg0: mujoco._structs.MjData, arg1: float) -> None

Invoked with: <mujoco._structs.MjData object at 0x7f8845f43430>, array([ 0.00000000e+00, -1.38453941e-04, -1.76734475e-01, -3.65575975e-02,
       -2.45711298e+00,  5.93502672e-03,  2.23521997e+00,  7.99383438e-01,
        2.08330000e-02, -2.08330000e-02, -6.34828177e-02,  2.02062562e-01,
        9.70000000e-01,  7.07106781e-01,  0.00000000e+00,  0.00000000e+00,
        7.07106781e-01, -1.88730494e-01,  3.20384503e-01,  9.70000000e-01,
        7.07106781e-01,  0.00000000e+00,  0.00000000e+00,  7.07106781e-01,
        5.78595875e-02,  2.64329413e-02,  9.70000000e-01,  7.07106781e-01,
        0.00000000e+00,  0.00000000e+00,  7.07106781e-01, -1.97384170e-01,
        1.89135261e-01,  9.70000000e-01,  7.07106781e-01,  0.00000000e+00,
        0.00000000e+00,  7.07106781e-01,  5.34091668e-02,  2.05182337e-01,
        9.70000000e-01,  7.07106781e-01,  0.00000000e+00,  0.00000000e+00,
        7.07106781e-01,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00])

# 5. Visualize the results

### 5.1 Load results

### 5.2 Visualize loss and success rate curves on datasets

### 5.3 Visualize policy rollouts