# Demonstration View

1. Compute Keep Steps
    1. Trajectory Analysis
    2. Compute Keep Steps
    3. Verify
2. Compute Masks
    1. Compute Mask
    2. Verify



View a demonstration by sliding through the frames and generate foreground segmentation.

This script creates the files that flowcontrol requires: `episode_0_keep.npz`, `episode_0.json`, etc.

In [None]:
import os
import numpy as np
from tqdm import tqdm

def isnotebook():
    try:
        shell = get_ipython().__class__.__name__
        if shell == 'ZMQInteractiveShell':
            return True   # Jupyter notebook or qtconsole
        elif shell == 'TerminalInteractiveShell':
            return False  # Terminal running IPython
        else:
            return False  # Other type (?)
    except NameError:
        return False      # Probably standard Python interpreter

interactive = isnotebook()  # becomes overwritten
if interactive:
    get_ipython().run_line_magic('matplotlib', 'notebook')
    from ipywidgets import *
    import matplotlib.pyplot as plt

In [None]:
import json
# default parameter values
segment_height = False
segment_labels = False  # this seems deprecated
segment_imgheight = False
segment_center = False
segment_erode = False
keep_frames_method = "sparse"
demonstration_type = None  # leave as None, set automatically

if interactive:    
    # set parameters here
    recording, episode_num = "./tmp_test/pick_n_place/", 0
    conf = [ (dict(name="color", color=(0, 0, 1), threshold=.65), dict(name="center")),
             (dict(name="color", color=(1, 0, 0), threshold=.90), dict(name="center")),
             (dict(name="color", color=(1, 0, 0), threshold=.90), dict(name="center"))]
    
    recording, episode_num = "/media/kuka/Seagate Expansion Drive/kuka_recordings/flow/multi2", 1
    conf = [ (dict(name="color", color=(1, 1, 0), threshold=.90), dict(name="center")),
             (dict(name="color", color="bw", threshold=.90), dict(name="center")) ]
    #conf = [ (dict(name="color", color=(200/255, 230/255, 220/255), threshold=.90), dict(name="center")),
    #         (dict(name="color", color="bw", threshold=.90), dict(name="center")) ]
    
else:
    # expect commandline input
    import sys
    if len(sys.argv) != 3:
        print("Usage: Demonstration_Viewer.py <episode_dir> <episode_num>")
    recording = sys.argv[1]
    episode_num = int(sys.argv[2])
    
    with open(os.path.join(recording, "segment_conf.json"), "r") as f_obj:
        conf = json.load(f_obj)
    print(conf)


recording_fn = os.path.join(recording, "episode_{}.npz".format(episode_num))
recording_dict = np.load(recording_fn)
state_recording = recording_dict["robot_state_full"]
actions = recording_dict["actions"]
ee_positions = state_recording[:, :3]
video_recording = recording_dict["rgb_unscaled"]
try:
    seg_masks = recording_dict["seg_masks"]
except (KeyError,ValueError):
    seg_masks = None

num_frames = video_recording.shape[0]
max_frame = num_frames-1
print("loaded.")

# 1. Compute Keep Steps 

Decide which frames to keep, saved as per-frame boolean array.

Various options are possible, current choice is find frames where movement is minimal.

Then look at gripper motion and keep only those where gripper is stable.

In [None]:
GRIPPER_OPEN, GRIPPER_CLOSE = 1.0, -1.0  # assume normalized actions

# use actions here instead of state position recordings as these
# are more direct and reliable
gr_actions = actions[:, -1]
keysteps = np.where(np.diff(gr_actions))[0].tolist()
keystep_names = ["" for _ in range(len(keysteps))]

# divide sequence into steps, defined by gripper action
segment_steps = np.zeros(num_frames)
segment_steps[np.array(keysteps)+1] = 1
segment_steps = np.cumsum(segment_steps).astype(int)

if demonstration_type is None:
    demonstration_type = ["navigate", "grasp", "grasp_insert"][len(keysteps)]
else:
    demonstration_type_auto = ["navigate", "grasp", "grasp_insert"][len(keysteps)]
    assert demonstration_type == demonstration_type_auto

def check_gripper_opens(idx):
    # check that we transition open->close & iter segment
    assert gr_actions[idx] == GRIPPER_OPEN
    assert gr_actions[idx+1] == GRIPPER_CLOSE
    assert segment_steps[idx+1] == segment_steps[idx] + 1  # next

def check_gripper_closes(idx):
    # check that we transition open->close & iter segment
    assert gr_actions[idx] == GRIPPER_CLOSE
    assert gr_actions[idx+1] == GRIPPER_OPEN
    assert segment_steps[idx+1] == segment_steps[idx] + 1  # next

# run some checks
if demonstration_type == "grasp":
    assert(len(keysteps) == 1)
    check_gripper_opens(keysteps[0])
    keystep_names[0] = "gripper_open"    
elif demonstration_type == "grasp_insert":
    assert(len(keysteps) == 2)
    check_gripper_opens(keysteps[0])
    check_gripper_closes(keysteps[1])
    keystep_names[0] = "gripper_open"
    keystep_names[1] = "gripper_close"
else:
    raise NotImplementedError

print("demonstration type:", demonstration_type)
for kn, kidx in zip(keystep_names, keysteps):
    print(kn, "@", kidx)

### Gripper Transtions

Gripper motion makes servoing difficult, filter out those frames where it moves.

In [None]:
def get_gripper_transitions(gripper_pos, diff_t=.0005, time_t=5):
    gripper_abs_vel = np.abs(np.diff(gripper_pos))
    stable = gripper_abs_vel < diff_t
    grip_stable = []
    grip_ends = []
    for i in range(len(stable)):
        snext = np.all(stable[i:min(i+time_t, len(stable))])
        grip_stable.append(snext)
        if grip_stable[-2:] == [0, 1]:
            grip_ends.append(i-1)

    return grip_ends, grip_stable

grip_ends, grip_stable = get_gripper_transitions(state_recording[:, -2])

# fix edge case, gripper dosen't stop in demo
if len(grip_ends) < len(keysteps):
    grip_ends.append(max_frame)    
assert len(keysteps) == len(grip_ends)

grip_unstable = list(zip(keysteps, grip_ends))

print("grip_unstable", grip_unstable)

### Movement Transitions

Slow robot motion indicates motion to a stable position, which we want to follow.

In [None]:
vel_threshold = .002  # [m/s]?

def get_stable_points(pos_vec):
    vel_vec = np.diff(pos_vec, axis=0)
    vel_scl = np.linalg.norm(vel_vec, axis=1)

    # This first loop gets minimal regions
    active = False
    start, stop = -1, -1
    min_regions = []
    for i in range(len(vel_scl)):
        if vel_scl[i] < vel_threshold:
            if active:
               stop = i
            else:
                active = True
                start, stop = i, i
        else:
            if active:
                min_regions.append((start, stop))
                active = False
                start, stop = -1, -1

    # This second loop gets minimal value
    vel_stable = []
    for start, stop in min_regions:
        try:
            min_idx = start + 1 + np.argmin(vel_scl[start:stop])
        except ValueError:
            min_inde = 0
        if len(vel_stable) == 0 or vel_stable[-1] != min_idx:
            vel_stable.append(min_idx)
    return vel_stable, vel_scl

vel_stable, vel_scl = get_stable_points(state_recording[:, :3])
print("vel_stable", vel_stable)

In [None]:
# Option 1: trust grip unstable, filter out vel_stable
grip_stable_arr = np.ones(num_frames)
for start, stop in grip_unstable:
    grip_stable_arr[start:stop] = False
vel_stable_filtered = []
for index in vel_stable:
    if grip_stable_arr[index]:
        vel_stable_filtered.append(index)
vel_stable = vel_stable_filtered
print("vel_stable", vel_stable, "(filtered with grip unstable)")

# Option 2: trust vel_stable, override grip_stable
# this is probably a bit more reasonable.

In [None]:
def get_keep_dict_sparse():
    keep_dict = {}
    keep_dict[0] = dict(name="demo_start")
    #keep_dict[0]['pre'] = dict(abs=state_recording[key].tolist())
    
    for k_idx, k_name in zip(keysteps, keystep_names):
        keep_dict[k_idx] = dict(name=k_name)
        
    for vel_s in vel_stable:
        keep_dict[int(vel_s)] = dict(name="vel_stable")
        
    keep_dict[int(max_frame)] = dict(name="demo_end")
    return keep_dict

# decide which frames to keep, after gripping mask a few steps
if keep_frames_method == "all":
    raise NotImplemetedError
    #keep_array = np.ones(num_frames, dtype=bool)
elif keep_frames_method == "dense":
    raise NotImplemetedError
    #keep_array = np.ones(num_frames, dtype=bool)
    #grip_step = keysteps[0]
    #gripper_close_steps = 30
    #keep_array[grip_step:grip_step+gripper_close_steps] = False
elif keep_frames_method == "sparse":
    keep_dict = get_keep_dict_sparse()
else:
    raise ValueError
    
# sort dictionary
keep_dict = {k: keep_dict[k] for k in sorted(keep_dict)}
print("keep_dict", list(keep_dict.keys()))

In [None]:
from scipy.spatial.transform import Rotation as R

dist_threshold = 0.020

def get_rel_motion(start, finish):
    # position
    pos_diff = finish[:3] - start[:3]
    ord_diff = R.from_quat(finish[3:7]).inv() * R.from_quat(start[3:7])
    #assert ord_diff.magnitude() < .35, ord_diff.magnitude() # for now
    return pos_diff.tolist() + ord_diff.as_quat().tolist()


remove_keys = []
prior_key = None
for key in keep_dict:
    if prior_key is None:
        prior_key = key
        continue
    rel_motion = get_rel_motion(state_recording[prior_key],
                                state_recording[key])
    rel_dist = np.linalg.norm(rel_motion[0:3])
    same_step = segment_steps[prior_key] == segment_steps[key]
    if rel_dist < dist_threshold and same_step:
        print("{} -> {}: {:.4f}".format(prior_key, key, float(rel_dist)),"(removing)")
        remove_keys.append(prior_key)
    else:
        prior_key = key

remove_keys = list(set(remove_keys))

for key in remove_keys:
    del keep_dict[key]
    
print("keep_dict", list(keep_dict.keys()), "(filtered with dist)")

In [None]:
# Iterate backward and save dist to grasp
max_dist = 10
step_since_grasp = max_dist
for key in reversed(sorted(keep_dict)):
    name = keep_dict[key]["name"]
    if name.startswith("gripper_"):
        step_since_grasp = 0
    else:
        step_since_grasp = min(step_since_grasp+1, max_dist)
    keep_dict[key]["grip_dist"] = step_since_grasp
    
prior_key = None
for key in sorted(keep_dict):
    if prior_key is None:
        prior_key = key
        continue
    pre_dict = {}
    
    same_segment = segment_steps[key] == segment_steps[prior_key]
    if not same_segment:
        pre_dict["grip"] = gr_actions[key]
    
    if keep_dict[prior_key]["grip_dist"] < 2:
        rel_motion = get_rel_motion(state_recording[prior_key],
                                    state_recording[key])
        pre_dict["rel"] = rel_motion
    else:
        abs_motion = state_recording[key].tolist()
        pre_dict["abs"] = abs_motion
        
    keep_dict[key]["pre"] = pre_dict
    prior_key = key

    
# double check that we retain all keep steps
assert(np.all([k in keep_dict.keys() for k in keysteps]))

keep_fn = recording_fn.replace(".npz", "_keep.json")
with open(keep_fn, 'w') as outfile:
    json.dump(keep_dict, outfile)
print("Saved to", keep_fn)

## 1. C. Verify keep frames

In [None]:
if interactive:
    keep_array = np.zeros(segment_steps.shape)
    keep_array[sorted(keep_dict.keys())] = True
    fig, (ax, ax2) = plt.subplots(2, 1)
    line = ax.imshow(video_recording[0])
    ax.set_axis_off()
    ax2.plot(state_recording[:, -2]*10, label="grip raw")
    ax2.plot(segment_steps/10, label="steps")
    ax2.plot(keep_array, label="keep")
    ax2.plot((gr_actions+1)/2, label="gripper action")
    ax2.set_ylabel("value")
    ax2.set_xlabel("frame number")
    vline = ax2.axvline(x=2, color="k")
    ax2.legend()

    def update(w):
        vline.set_data([w, w], [0, 1])
        line.set_data(video_recording[w])
        fig.canvas.draw_idle()
        if w in keep_dict:
            print(keep_dict[w])
            print()
    slider_w = widgets.IntSlider(min=0, max=max_frame, step=1, value=0,
                                 layout=Layout(width='70%'))
    interact(update, w=slider_w)

    print("What I want to know: do I servo y/n, do I translate?")
    # Convert this keep_array stuff into a dict
    # then do one iteration of look ahead to set a servoing flag.

### Show Gripper Motion
Show when the gripping is done, depending on gripper motion.

In [None]:
# Plot gripper
if interactive:
    val, label = state_recording[:, -2], "gripper_pos"
    fig, (ax, ax2) = plt.subplots(2, 1)
    line = ax.imshow(video_recording[0])
    ax.set_axis_off()
    line1 = ax2.plot((gr_actions+1)/2, label="gripper action", color="r")
    line2 = ax2.plot(grip_stable, label="grip stable")
    ax2.set_ylabel("value")
    ax2.set_xlabel("frame number")
    ax2r = ax2.twinx()
    line3 = ax2r.plot(val, label=label, color="b")
    vline = ax2.axvline(x=2, color="k")
    lns = line1+line2+line3
    labs = [l.get_label() for l in lns]
    ax2.legend(lns, labs)

    def update(w):
        print("{} @ {} is {}".format(label, w, val[w]))
        vline.set_data([w, w], [0, 1])
        line.set_data(video_recording[w])
        fig.canvas.draw_idle()

    slider_w = widgets.IntSlider(min=0, max=max_frame, step=1, value=0,
                                 layout=Layout(width='70%'))
    interact(update, w=slider_w)
    

### Show Velocities
Look at the end effector motion.

In [None]:
if interactive:
    val, label =  vel_scl, "velocity"
    fig, (ax, ax2) = plt.subplots(2, 1)
    line = ax.imshow(video_recording[0])
    ax.set_axis_off()
    line1 = ax2.plot(state_recording[:,0], label="x")
    line2 = ax2.plot(state_recording[:,1], label="y")
    line3 = ax2.plot(state_recording[:,2], label="z")
    ax2.set_ylabel("value")
    ax2.set_xlabel("frame number")
    ax2r = ax2.twinx()
    line4 = ax2r.plot(val, label=label, color="b")
    ax2r.axhline(y=vel_threshold, linestyle="--", color="k")
    vline = ax2.axvline(x=2, color="k")
    lns = line1+line2+line3+line4
    labs = [l.get_label() for l in lns]
    ax2.legend(lns, labs)

    #ax2.legend()

    def update(w):
        print("{} @ {} is {}".format(label, w, val[w]))
        vline.set_data([w, w], [0, 1])
        line.set_data(video_recording[w])
        fig.canvas.draw_idle()

    slider_w = widgets.IntSlider(min=0, max=max_frame, step=1, value=0,
                                 layout=Layout(width='70%'))
    interact(update, w=slider_w)

# 2. Compute Mask

Mask out the foreground object so that foreground specific flow can be calculated.

In [None]:
from skimage import measure
from scipy import ndimage
from demo_segment_util import mask_color, erode_mask, label_mask, mask_center

# create a segmentation mask
def get_mask(frame, i=None, threshold=0):
    """
    create segmentation mask for single frame
    Args:
        frame: input frame w x h x 3 [0,255] array
        i: index of frame, for indexing parameters
        threshold: threshold for color
        
    Returns:
        mask: binary numpy array, with True == keep
    """    
    image = frame.copy()
    step = segment_steps[i]
    step_conf = conf[step]
    
    for seg_option in step_conf:
        name = seg_option["name"]
        
        if name == "color":
            color_choice = seg_option["color"]
            mask = mask_color(image, color_choice=color_choice, threshold=threshold)
            
        elif name == "erode":
            mask = erode_mask(mask)
            
        elif name == "height":
            raise NotImplementedError
            depth2 = transform_depth(depth[i], np.linalg.inv(T_tcp_cam))
            mask2 = get_mask_depth(depth2, 600, 1550)
            mask[mask2] = True
    
        elif name == "labels":
            raise NotImplementedError
            mask = ndimage.morphology.binary_closing(mask, iterations=4)
            mask = label_mask(mask, i)
    
        elif name == "imgheight":
            height_val = seg_option["height"]
            mask[:height_val, :] = False
            
        elif name == "center":
            mask = mask_center(mask)
            
    return mask

# Plot
if interactive:
    print("Colored stuff is keept - mask==True")
    print("keysteps:", keysteps)
    print("segments: ", len(conf))

    fig, ax = plt.subplots(1, 1)
    line = ax.imshow(video_recording[25])
    ax.set_axis_off()
    def update(i, t):
        # detect first frame of next step
        first_frame = i==0 or i-1 in keysteps
        if first_frame:
            step = segment_steps[i]
            new_t = conf[step][0]["threshold"]
            print("setting t =", new_t, "because i =", i)
            slider_t.value = new_t*100

        image = video_recording[i].copy()
        mask = get_mask(image, i=i, threshold=t/100)    
        image[np.logical_not(mask)] = 255, 255, 255
        line.set_data(image)
        fig.canvas.draw_idle()

    slider_i = widgets.IntSlider(min=0, max=max_frame, step=1, value=0,
                                 layout=Layout(width='70%'))
    slider_t = widgets.IntSlider(min=0, max=100, step=1, value=conf[0][0]["threshold"]*100,
                                 layout=Layout(width='70%'))
    interact(update, i=slider_i, t=slider_t)

In [None]:
masks = np.zeros(video_recording.shape[:3], dtype=bool)
switch_frame = keysteps
print("switching at:", switch_frame)

for seg_option in conf:
    c = seg_option[0]["color"]
    t = seg_option[0]["threshold"]
    print("c={}, t={}".format(c, t))

for i in tqdm(range(len(video_recording))):
    try:
        step = segment_steps[i]
        threshold = conf[step][0]["threshold"]
    except IndexError:
        break
    mask = get_mask(video_recording[i], i, threshold)
    masks[i] = mask

print(np.mean(masks) * 100, "% of pixels fg")
mask_fn = recording_fn.replace(".npz", "_mask.npz")
np.savez_compressed(mask_fn, mask=masks)
print("Saved to", mask_fn)

## 2. B. Verify Masking Results

In [None]:
if interactive:
    fig, ax = plt.subplots(1)
    handle = ax.imshow(masks[25])
    ax.set_axis_off()

    def update(i):
        image = video_recording[i].copy()
        mask = masks[i]
        print(round(np.mean(mask)*100),"%fg, mask shape", mask.shape)
        image[np.logical_not(mask)] = 255, 255, 255
        handle.set_data(image)
        fig.canvas.draw_idle()

    slider_i2 = widgets.IntSlider(min=0, max=max_frame, step=1, value=0,
                                 layout=Layout(width='70%'))
    interact(update, i=slider_i2)

In [None]:
if seg_masks is not None:
    for i in range(num_frames):
        image = video_recording[i].copy()
        mask = masks[i]
        # mask segmentation mask(gt) with fg mask (computed)
        ma = np.ma.array(seg_masks[i], mask=np.logical_not(masks[i]))
        ma_unique = np.unique(ma, return_counts=True)
        # unique is sorted by size, pick the biggest
        idx_largest = np.where(ma_unique[0])[-1][0]
        seg_id, mask_count = ma_unique[0][idx_largest], ma_unique[1][idx_largest]
        seg_count = np.sum(seg_masks[i] == seg_id)
        # test how much we segmented / how much there is
        score = mask_count / seg_count
        assert score > .9
        
    print("Segmentation test passed.")

# Masking based on Depth