# Multi-Task Live Selection 1 vs N

To use this notebook, you already should have run the following notebooks:
1. E4_0_Record_and_Segment.ipynb
2. E4_1_Recombination_Selection.ipynb

Summary:
1. This notebook uses already computed scores stored as npz files
2. For each part, compute scores at the front live (current_rgb vs first frame of the part)
3. Compute full trajectory from current state to the goal state

We compute rewards for the shapes: {'trapeze'} using scoring functions {'sum', 'prod'}

In [1]:
import os
import copy
import time
import json
import shutil
import unittest
import subprocess
from pathlib import Path
import numpy as np

from scipy.spatial.transform import Rotation as R

from gym_grasping.envs.robot_sim_env import RobotSimEnv
from flow_control.demo.demo_episode_recorder import record_sim
from flow_control.runner import evaluate_control
from flow_control.servoing.module import ServoingModule
from flow_control.servoing.playback_env_servo import PlaybackEnvServo
import matplotlib.pyplot as plt
from ipywidgets import widgets, interact, Layout
import seaborn as sns
from tqdm import tqdm
import getpass


%matplotlib inline

experiment = "multi_task_1_vs_n"
goal = "multi_task_goal_1_vs_n"

renderer = 'debug'
task_variant = 'rP'

def get_data_dir():
    username = getpass.getuser()
    if username == "argusm":
        return "/tmp/flow_experiments3"
    elif username == "nayakab":
        return "../tmp"

data_dir = get_data_dir()

root_dir = os.path.join(data_dir, experiment)

pybullet build time: May 20 2022 19:44:17


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


## Load all Demonstrations

In [2]:
def get_recordings(directory):
    return sorted([os.path.join(directory, rec) for rec in os.listdir(directory) if os.path.isdir(os.path.join(directory, rec))])

recordings = get_recordings(root_dir)

In [3]:
# Load the demonstration episodes
playbacks = [PlaybackEnvServo(rec) for rec in recordings[:]]

# Plot the demonstrations
%matplotlib notebook
fig, ax = plt.subplots(1,figsize=(8, 6))
fig.suptitle("Demonstration Frames")
ax.set_axis_off()
image_h = ax.imshow(playbacks[0].cam.get_image()[0])

def update(demo_index, frame_index):
    image = playbacks[demo_index][frame_index].cam.get_image()[0]
    image_h.set_data(image)
    fig.canvas.draw_idle()
    print("wp_name:", playbacks[demo_index][frame_index].get_info()["wp_name"])
    fg_mask = playbacks[demo_index].get_fg_mask()
    if fg_mask is not None:
        print("percent fg:", np.mean(fg_mask)*100)
    
slider_w = widgets.IntSlider(min=0, max=len(playbacks)-1, step=1, value=0,
                             layout=Layout(width='70%'))
slider_i = widgets.IntSlider(min=0, max=200-1, step=1, value=0,
                             layout=Layout(width='70%'))

interact(update, demo_index=slider_w, frame_index=slider_i)

<IPython.core.display.Javascript object>

interactive(children=(IntSlider(value=0, description='demo_index', layout=Layout(width='70%'), max=70), IntSli…

<function __main__.update(demo_index, frame_index)>

# Read scores (errors) from the stored files

In [None]:
error_matrix = np.load(f"{root_dir}/error_matrix.npz")['arr_0']
score_matrix = 1 - error_matrix

# Errors wrt goal images
scores_rear = {}
tasks = ['pnp', 'ss']
for task in tasks:
    scores_rear[task] = 1 - np.load(f"{root_dir}/errors_rear_{task}.npz")['arr_0']

In [None]:
# Get good demonstrations and demonstration part information
def filter_demo(pb):
    return pb[-1].data['rew'] > 0 and np.mean(pb.get_fg_mask()) > 0.005

demo_good = [filter_demo(pb) for pb in playbacks]
good_demonstrations = np.where(demo_good)[0]

good_demonstrations = [int(x) for x in good_demonstrations]
live_seeds = good_demonstrations
print(good_demonstrations)

In [None]:
# Load demo segmentation file
demo_seg_file = f'{root_dir}/demo_parts_manual2.json'
fp = open(demo_seg_file)
demo_parts = json.load(fp)
live_seeds = [int(key) for key in demo_parts.keys()]
demo_keys = demo_parts.keys()

## Compute live scores 
1. We compute live scores of the current state of the gripper wrt start frames of the part.
2. The part is identified by traj_idx in the function
3. demo_parts contains all the part information and the keyframes relating to each part

In [None]:
from sklearn.preprocessing import minmax_scale

# Load Servoing Module
from flow_control.servoing.module import ServoingModule
control_config = dict(mode="pointcloud-abs-rotz", threshold=0.40)
servo_module = ServoingModule(recordings[0], control_config=control_config,
                              start_paused=False)

def compute_current_scores(playbacks, current_rgb, demo_parts, demo_good, traj_idx=0, live_seed=0):    
    sim_errors = np.ones(len(playbacks)) # lower is better
    mean_flows = np.zeros(len(playbacks))

    for demo_seed in good_demonstrations:
        start_idx = demo_parts[str(demo_seed)][traj_idx]['start']
        demo_rgb =  playbacks[demo_seed][start_idx].cam.get_image()[0]
        demo_mask =  playbacks[demo_seed].fg_masks[start_idx]
        error, mean_flow = similarity_from_reprojection(current_rgb, demo_rgb, demo_mask)
        sim_errors[demo_seed] = error
        mean_flows[demo_seed] = mean_flow
    errors_norm = normalize_errors(sim_errors, mean_flows, demo_good)
    scores = 1 - errors_norm
    
    return scores

def similarity_from_reprojection(live_rgb, demo_rgb, demo_mask, return_images=False):
    # evaluate the similarity via flow reprojection error
    flow = servo_module.flow_module.step(demo_rgb, live_rgb)
    warped = servo_module.flow_module.warp_image(live_rgb / 255.0, flow)
    diff = (warped - (demo_rgb / 255.0))
    error = np.linalg.norm((warped - (demo_rgb / 255.0)), axis=2) * demo_mask
    error = error.sum() / demo_mask.sum()
    mean_flow = np.linalg.norm(flow[demo_mask],axis=1).mean()
    if return_images:
        return error, mean_flow, flow, warped
    return error, mean_flow


def normalize_errors(errors, flows, demo_good):
    errors_l = errors[demo_good]
    mean_flows_l = flows[demo_good]
    errors_norm = np.ones(errors.shape)
    w = .5
    errors_norm[demo_good] = np.mean((1*minmax_scale(errors_l), w*minmax_scale(mean_flows_l)),axis=0)/(1+w)
    return errors_norm

In [None]:
# Arrange keypoint information for all parts in a easy to use data structure
def split_keypoints(pb, demo_part):
    demo_keep = sorted(list(pb.keep_dict.keys()))
    keep_all = copy.copy(pb.keep_dict)
    keep_parts = {}
    for p in demo_part:
        if p["start"] == 0:
            p_start = -1
        else:
            p_start = p["start"]

        parts = []
        for demo_index in demo_keep:
            if p_start < demo_index and p["end"] >= demo_index:
                parts.append(demo_index)   

        keep_parts[p["name"]] = parts
        
    # Need to also add the first frame of the 'insert' part to the 'locate' part
    keep_parts['locate'].append(keep_parts['insert'][0])
    return keep_parts

keypoint_info = {}
for demo_seed in good_demonstrations:
    keypoint_info[demo_seed] = split_keypoints(playbacks[demo_seed], demo_parts[str(demo_seed)])

keypoint_info

## Search for best trajectory 

For this experiment, we only consider the 'sum' and 'prod' scoring functions.

The following function computes the cumulative scores starting at each part all the way to the goal state.

For example:

live_state ---- score_matrix_1 ---- score_matrix_2 ---- goal_state 

simplifies to 

live_scores ---- cscore[0] ---- cscore[1] ---- cscore[2]

1. cscore[2] just contains the goal scores
2. cscore[i] is an array of size n ** (num_parts - i)
3. score_matrices = [score_matrix_1, score_matrix_2, and so on]
4. Once the live scores are available, they can be used along with the corresponding cumulative score to find the best demonstration index

In [None]:
# Compute cumulative scores staring from the goal state
# score_matrices should contain all the computed score matrices in order from live state to goal state

def get_cumulative_scores(scores_rear, score_matrices, score_fn='sum'):
    # Start from goal state, add goal scores to the list
    cumulative_scores = [scores_rear]    
    
    num_demos = scores_rear.shape[0]
    
    # Need to compute cumulative scores from the goal state. Need to reverse the order
    score_matrices.reverse()
    
    # Compute cumulative scores at the start of each part by repeat and broadcast operations
    for idx, sm in enumerate(score_matrices):
        sm = sm.repeat(num_demos ** idx, axis=1)
        if score_fn == 'sum':
            new_scores = sm + cumulative_scores[-1]
        elif score_fn == 'prod':
            new_scores = sm * cumulative_scores[-1]
        new_scores = new_scores.reshape(-1)
        cumulative_scores.append(new_scores)
    
    # Need to reverse this list so that scores are arranged in the direction of traversal
    # This will now be of the format [n ** 3, n**2, n] (3 part case) for n demonstrations
    cumulative_scores.reverse()
    
    return cumulative_scores

score_matrices = [score_matrix]

cumulative_scores = {}
cumulative_scores['sum'] = get_cumulative_scores(scores_rear['pnp'][:, 0], score_matrices, score_fn='sum')
cumulative_scores['prod'] = get_cumulative_scores(scores_rear['pnp'][:, 0], score_matrices, score_fn='prod')

In [None]:
print([len(x) for x in cumulative_scores['sum']])
print([len(x) for x in cumulative_scores['prod']])

## Compute best demonstration index using live score and cumulative_scores

1. 'score_front' is computed live
2. 'cm_score' contains the cumulative score (flattened) at the corresponding part index
3. 'factor' helps determine the correct index of the demonstration
4. 'score_front' is repeated 'factor' times to enable combining it with the cumulative score.
4. Then, an argmax operation helps find the correct index

In [None]:
def get_best_part(score_front, cm_score, factor, step_value=None, score_fn='sum'):
    sf = score_front.repeat(factor)
    if score_fn == 'sum':
        temp = sf + cm_score
    elif score_fn == 'prod':
        temp = sf * cm_score
    
    # Based on the current step value, take argmax on a subset of the indices
    return np.argmax(temp[0:factor * step_value]) // factor

## Evaluation Function

In [None]:
# Evaluation function
from math import pi
from flow_control.servoing.module import ServoingModule
from gym_grasping.envs.robot_sim_env import RobotSimEnv
from flow_control.runner import evaluate_control
import ipdb
import cv2


selected_recordings = {}
for task in ['pick_n_place']:
    for score_fn in ['sum', 'prod']:
        for steps in [1, 5, 10, 15, 20]:
            selected_recordings[f'{task}_{score_fn}_{steps}'] = {}
        
def eval_cmb(playbacks, demo_good, live_seed, demo_parts, keypoint_info, task, step_val, score_fn, sf):
    
    save_root = f'{data_dir}/multi_task_run_1vsN/{task}/{score_fn}/{step_val}'
    cscores = cumulative_scores[score_fn]
   
    # Instantiate env
    env = RobotSimEnv(task='recombination', renderer=renderer, act_type='continuous',
                      initial_pose='close', max_steps=500, control='absolute-full',
                      img_size=(256, 256), param_randomize=("geom",),
                      param_info={'object_selected': 'trapeze', 'task_selected': task},
                      task_info=dict(object_rot_range={"rP":pi/2.,"rR":pi/6.}[task_variant]),
                      seed=int(live_seed))
    
    # Mapping part to index
    traj_map = {0: 'locate', 1: 'insert'}
    
    # Mapping task name to a short form. These goal scores were stored using these keys
    task_map = {'pick_n_place': 'pnp', "shape_sorting": "ss"}
    
    save_dir = None
    
    # Total number of demonstrations that are used
    num_demos = scores_rear[task_map[task]].shape[0]
    
    for idx in range(2):        
        state, _, _, _ = env.step(None)
        
        # Get the current gripper state
        current_rgb = state['rgb_gripper']
        
        if sf is None:
            # This is for the second part. Hard code this to idx==0 for a faster experiment
            if idx == 1:
                best_idx = 0
            else:
                print('Not possible to get here')
        else:
            
        # Compute live scores wrt the first image of the current part under consideration
#             scores_front = compute_current_scores(playbacks, current_rgb, demo_parts, demo_good, traj_idx=idx, live_seed=live_seed)

            # Compute a trajectory, get the demonstration (part) index that maximizes the score
            best_idx = get_best_part(sf, cscores[idx], num_demos ** (1 - idx), score_fn=score_fn, step_value=step_val)        
        best_demo = recordings[best_idx]
        
        # Keypoints for the demonstration part    
        kp_info = keypoint_info[best_idx]
        kps = kp_info[traj_map[idx]]
        
        if idx == 0:
            selected_recordings[f'{task}_{score_fn}_{step_val}'][live_seed] = [best_demo]
        
        if idx == 1:
            save_dir = f"{save_root}/run_pnp_{live_seed}_{best_idx}"
            folder_idx = 1
            updated_dir = save_dir
            while os.path.isdir(updated_dir):
                updated_dir = f"{save_dir}_{folder_idx}"
                folder_idx += 1
            
            save_dir = updated_dir
            selected_recordings[f'{task}_{score_fn}_{step_val}'][live_seed].append(best_demo)
            
        servo_module = ServoingModule(best_demo, control_config=control_config,
                                      start_paused=False, plot=False, plot_save_dir=None,
                                      load='select', selected_kp=kps)
        reward = 0
        _, reward, _, info = evaluate_control(env, servo_module, max_steps=130, save_dir=save_dir,
                                             initial_align=True if idx == 0 else False)
        
        sf = None
        
    del env
    del servo_module
    return reward

In [None]:
# Compute scores_front for a faster experiment
# These scores are computed for every step vlaue that is used
# SO, we pre-compute the scores and store it in a file. This is accessed when required

def get_scores_front(seeds, task, demo_parts, demo_good):
    scores_front = []
    for idx, seed in enumerate(seeds):
        # Instantiate env
        env = RobotSimEnv(task='recombination', renderer=renderer, act_type='continuous',
                          initial_pose='close', max_steps=500, control='absolute-full',
                          img_size=(256, 256), param_randomize=("geom",),
                          param_info={'object_selected': 'trapeze', 'task_selected': task},
                          task_info=dict(object_rot_range={"rP":pi/2.,"rR":pi/6.}[task_variant]),
                          seed=int(seed))
        
        state, _, _, _ = env.step(None)
        
        # Get the current gripper state
        current_rgb = state['rgb_gripper']

        # Compute live scores wrt the first image of the current part under consideration
        sf = compute_current_scores(playbacks, current_rgb, demo_parts, demo_good, traj_idx=0, live_seed=seed)
        
        scores_front.append(sf)
        
        del env
        
    return scores_front

In [None]:
num_live_seeds = 20

sel_recordings = {}
seeds = range(num_live_seeds)

for task in ['pick_n_place']:    
    scores_front = get_scores_front(seeds, task, demo_parts, demo_good)

# Save scores_front in a file
np.save(f'{root_dir}/scores_front.npy', scores_front, allow_pickle=True)

# Load scores_front from file
# scores_front = np.load(f'{root_dir}/scores_front.npy', allow_pickle=True)

In [None]:
# Evaluate

all_rewards = {}

for task in ['pick_n_place']:    
    for score_fn in ['sum', 'prod']:
        for step_idx, step_val in enumerate([1, 5, 10, 15, 20]):
            rewards = []
            for live_idx, live_seed in enumerate(range(num_live_seeds)):
                rew = eval_cmb(playbacks, demo_good, live_seed, demo_parts, keypoint_info, task, score_fn=score_fn, step_val=step_val, sf=scores_front[live_idx])
                rewards.append(rew)
            all_rewards[f"{task}_{score_fn}_{step_val}"] = rewards

In [4]:
# Results

object_uids = {'trapeze': 2, 'oval': 6}
def filter_run(pb, uid):
    return 1 if pb[-1].data['rew'] > 0 and np.atleast_1d(pb[-1].data['info'])[0]['object_selected'] == uid else 0

new_rewards = {}

# Get filtered rewards from the run directory
for task in ['pick_n_place']:
    for score_fn in ['sum', 'prod']:
        for step_idx, step_val in enumerate([1, 5, 10, 15, 20]):
            run_dir = f'{data_dir}/multi_task_run_1vsN/{task}/{score_fn}/{step_val}'
            
            rewards = []
            for run in os.listdir(run_dir):            
                pb = PlaybackEnvServo(os.path.join(run_dir, run))
                rew = filter_run(pb, object_uids['trapeze'])
                rewards.append(rew)
            new_rewards[f"{task}_{score_fn}_{step_val}"] = rewards









In [33]:
# Reward Plot
import math
from matplotlib.ticker import MaxNLocator

import seaborn as sns
sns.set_theme(style="darkgrid")

steps = [1, 5, 10, 15, 20]
fig, ax = plt.subplots()
# ax = plt.figure().gca()
def format_fn(tick_val, tick_pos):
    if int(tick_val) in steps:
        return int(tick_val)
    else:
        return ''

# A FuncFormatter is created automatically.
# ax.xaxis.set_major_formatter(format_fn)
#ax.xaxis.set_major_locator(MaxNLocator(integer=True))

mean_rP, std_rP = 0.38, 0.19


for task in ["pick_n_place"]:
    mean_rew = {}
    for score_fn in ['sum', 'prod']:
        rew = []
        for step_idx, step_val in enumerate(steps):
            key = f"{task}_{score_fn}_{step_val}"
            rew.append(np.mean(new_rewards[key]))
        mean_rew[score_fn] = rew
        str_x = [str(x) for x in [0,]+steps]
        ax.plot(str_x, [mean_rP,]+rew, '.-', label=f'score fn: {score_fn}')
        
#ax.set_ylim(ymin=0)
ax.set_ylim(0.15,1.0)

ax.plot(["0", "20"], [mean_rP, mean_rP], "k--", label="one P-n-P demo.")
ax.axhspan(mean_rP - std_rP, mean_rP + std_rP, facecolor='gray', alpha=0.4)
ax.legend(loc="center right")

ax.set_title("Multi-Task Experiment")
ax.set_xlabel("Num. of Additional Shape-Sorting Demonstrations")
ax.set_ylabel("Mean Pick-n-Place Rewards")
fig.savefig('multi_task.jpg', dpi=600)
fig.savefig('multi_task.pdf', dpi=600)

<IPython.core.display.Javascript object>