# Knowing When to Put Your Foot Down

by Leslie Ikemoto, Okan Arikan, David Forsyth  
Association for Computing Machinery, 2006

Notebook by Jerome Eippers, 2024

In [None]:
%matplotlib widget
import ipyanimlab as lab
import numpy as np
from sklearn.neighbors import NearestNeighbors
from ipywidgets import widgets, interact
from ipycanvas import Canvas
from matplotlib import pyplot as plt

viewer = lab.Viewer(move_speed=5, width=1280, height=720)

In [None]:
character = viewer.import_usd_asset('AnimLabSimpleMale.usd')

## Load a few animations

We will use a few animations to have different locomotion types and different style.  
At this point, we will make sure that all the animations we take will be easily sliceable with the default clip_length. (200 frames)

In [None]:
window_size = 5
clip_length = 200
n_neighbors = 10

In [None]:
ranges = {
    'walk1_subject5' : [(60, 7060)],
    'walk3_subject1' : [(100, 900), (1300, 2500), (4400, 5200), (7150,7350)], 
    'run1_subject2' : [(100, 5100) ], 
    'dance1_subject2' : [(100, 1900) ], 
    'aiming2_subject2' : [(100, 4100), (4900, 6100), (8300,8900)], 
}

animations = []
for anim in ranges.keys():
    animmap = lab.AnimMapper(character, keep_translation=False, root_motion=True, match_effectors=True, local_offsets={'Hips':[0, 2, 0]})
    animation = lab.import_bvh(f'../../resources/lafan1/bvh/{anim}.bvh', anim_mapper=animmap)
    
    for start, end in ranges[anim]:
        new_anim = lab.Anim(animation.quats[start-window_size:end+window_size,:, :], animation.pos[start-window_size:end+window_size,:, :], animation.offsets, animation.parents, animation.bones)
        new_anim.quats[:, 0, :], new_anim.pos[:, 0, :] = lab.utils.qp_mul(lab.utils.qp_inv((new_anim.quats[0:1, 0, :], new_anim.pos[0:1, 0, :])), (new_anim.quats[:, 0, :], new_anim.pos[:, 0, :]))
        animations.append(new_anim)

In [None]:
def render(frame):
    
    viewer.begin_shadow()
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    
    for anim in animations:
        f = int(min( anim.pos.shape[0]-1, frame))
        p = (anim.pos[f,...])
        q = (anim.quats[f,...])
        m = lab.utils.quat_to_mat(q, p)
        viewer.draw(character, m)
    
    viewer.end_display()
    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(min=0, max=7000)
)
viewer

In [None]:
total_frames = 0
for anim in animations:
    total_frames += anim.pos.shape[0]-(window_size*2)
    
display(total_frames)
display(total_frames/clip_length)

## Compute the feature vector

This vector will have the position of the knees, ankles and toes for a window of time. 

In [None]:
# 6 : bones, leftleg, leftfoot, lefttoe, rightleg, rightfoot, righttoe
# 3 : xyz
# 1 + all the frames from the window before and all the frames from the window after
feature_vector = np.empty([total_frames, 6 * 3 * (1 + 2*window_size)], dtype=np.float32)
feature_vector_indices = np.empty([total_frames, 2], dtype=np.int32)

bone_indices = np.array([
    character.bone_index("LeftLeg"), 
    character.bone_index("LeftFoot"), 
    character.bone_index("LeftToe"), 
    character.bone_index("RightLeg"), 
    character.bone_index("RightFoot"),
    character.bone_index("RightToe")
], dtype=np.int32)


frame_in_vector = 0
for i, anim in enumerate(animations):
    gquats, gpos = lab.utils.quat_fk(anim.quats, anim.pos, animation.parents)
    rootq, rootp = lab.utils.qp_inv((gquats[:,0,:], gpos[:,0,:]))
    for frame in range(window_size, gpos.shape[0]-window_size):
        feature_positions = gpos[frame-window_size:frame+window_size+1, bone_indices, :]
        feature_positions = lab.utils.quat_mul_vec(rootq[frame][np.newaxis, np.newaxis, :], feature_positions)
        feature_positions += rootp[frame][np.newaxis, np.newaxis, :]
        
        feature_vector[frame_in_vector, :] = feature_positions.flatten()
        feature_vector_indices[frame_in_vector, :] = [i, frame]
        
        frame_in_vector += 1

In [None]:
def render(frame, mirror=False):
    
    viewer.begin_shadow()
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.end_display()
    
    viewer.disable(depth_test=True)
    
    point_matrices = np.eye(4, dtype=np.float32)[np.newaxis,...].repeat(feature_vector.shape[1]/3, axis=0)
    positions = feature_vector[frame,:].reshape(-1,3).copy()
    if mirror:
        positions[:, 0] *= -1
    point_matrices[:, :3, 3] = positions
    viewer.draw_axis(point_matrices, 2)
    
    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(min=0, max=total_frames-1)
)
viewer

## Train the oracle

We start from an empty trained feature vector and label, and we pick the first range in the animation to manually set the labels

In [None]:
trained_feature_vector = np.empty([0, feature_vector.shape[1]], dtype=np.float32)
trained_label = np.empty([0, 4], dtype=np.int32)

In [None]:
animation_q, animation_p = animations[0].quats[window_size:clip_length+window_size,...], animations[0].pos[window_size:clip_length+window_size,...]
_, animation_gp = lab.utils.quat_fk(animation_q, animation_p, animations[0].parents)
current_feature_vector = feature_vector[:clip_length]
current_label = np.zeros([clip_length, 4], dtype=np.int8)

In [None]:
canvas = Canvas(width=1280, height=160)

mouse_over_index = -1

def update_canvas():
    canvas.clear()
    
    size = 1280.0/(animation_q.shape[0])
    xs = np.linspace(0, 1280-size, animation_q.shape[0], dtype=np.int32)
    
    heights = 5 + 30 * current_label[:, 0] 
    canvas.fill_style = 'green'
    canvas.fill_rects(xs, 0, 4, heights)
    
    heights = 5 + 30 * current_label[:, 1] 
    canvas.fill_style = 'green'
    canvas.fill_rects(xs, 40, 4, heights)
                     
    heights = 5 + 30 * current_label[:, 2] 
    canvas.fill_style = 'cyan'
    canvas.fill_rects(xs, 80, 4, heights)
    
    heights = 5 + 30 * current_label[:, 3] 
    canvas.fill_style = 'cyan'
    canvas.fill_rects(xs, 120, 4, heights)

    
# render function
def render(frame):
    global mouse_over_index
    q = (animation_q[frame,...])
    p = (animation_p[frame,...])
    
    ogp = animation_gp[frame, ...]
    ogp[:, 1] = 0
        
    points = np.array([[-5,0,0], [0,0,5], [0,0,5], [5,0,0], [5,0,0], [0,0,-5], [0,0,-5], [-5,0,0]], dtype=np.float32) * 1.5
        
        
    a = lab.utils.quat_to_mat(q, p)
    viewer.set_shadow_poi(p[0])
    
    viewer.begin_shadow()
    viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.draw(character, a)
    viewer.end_display()
    viewer.disable(depth_test=True)
    
    point_matrices = np.eye(4, dtype=np.float32)[np.newaxis,...].repeat(current_feature_vector.shape[1]/3, axis=0)
    positions = current_feature_vector[frame,:].reshape(-1,3)
    positions = lab.utils.quat_mul_vec(q[0][np.newaxis, :], positions)
    positions += p[0][np.newaxis, :]
    
    point_matrices[:, :3, 3] = positions
    viewer.draw_axis(point_matrices, 2)
    
    if current_label[frame, 0]:
        pts = points + ogp[character.bone_index('LeftFoot')]
        pts[:, 1] = 0
        viewer.draw_lines(pts, color=np.array([0,.8,0], dtype=np.float32))
    
    if current_label[frame, 1]:
        pts = points + ogp[character.bone_index('LeftToe')]
        pts[:, 1] = 0
        viewer.draw_lines(pts, color=np.array([0,.8,0], dtype=np.float32))
        
    if current_label[frame, 2]:
        pts = points + ogp[character.bone_index('RightFoot')]
        pts[:, 1] = 0
        viewer.draw_lines(pts, color=np.array([0.2,0.,1], dtype=np.float32))
        
    if current_label[frame, 3]:
        pts = points + ogp[character.bone_index('RightToe')]
        pts[:, 1] = 0
        viewer.draw_lines(pts, color=np.array([0.2,0.1,1], dtype=np.float32))
        
    if mouse_over_index == 0:
        pts = points * .8 + ogp[character.bone_index('LeftFoot')]
        pts[:, 1] = 0
        viewer.draw_lines(pts)
        
    if mouse_over_index == 1:
        pts = points * .8 + ogp[character.bone_index('LeftToe')]
        pts[:, 1] = 0
        viewer.draw_lines(pts)
        
    if mouse_over_index == 2:
        pts = points * .8 + ogp[character.bone_index('RightFoot')]
        pts[:, 1] = 0
        viewer.draw_lines(pts)
        
    if mouse_over_index == 3:
        pts = points * .8 + ogp[character.bone_index('RightToe')]
        pts[:, 1] = 0
        viewer.draw_lines(pts)

    viewer.execute_commands()
    
    
# the timeline we use
timeline = lab.Timeline(min=0, max=animation_q.shape[0]-1)
    
# the buttons of the interface ---------------------------------------
bt_auto = widgets.Button(description='generate using speed')
speed_spin = widgets.FloatSlider(min=0.01, max=1, step=0.01)
buttons = widgets.HBox([bt_auto, speed_spin])

def on_generate_click(event):
    current_label[:, :2], current_label[:, 2:] = lab.utils.extract_feet_contacts(animation_gp, [character.bone_index('LeftFoot'), character.bone_index('LeftToe')], [character.bone_index('RightFoot'), character.bone_index('RightToe')],  speed_spin.value)
    render(timeline.value)
    update_canvas()
    
bt_auto.on_click(on_generate_click)


# canvas interact
do_set_values = [-1, 0]
def handle_mouse_move(x, y):
    global mouse_over_index
    frame = int(x/1280.0*animation_q.shape[0])
    mouse_over_index = int(y / 160.0 * 4.0)
    timeline.set_value(frame)
    render(frame)
    if do_set_values[0] > -1:
        current_label[frame, do_set_values[0]] = do_set_values[1]
        update_canvas()
    
canvas.on_mouse_move(handle_mouse_move)

def handle_mouse_down(x, y):
    frame = int(x/1280.0*animation_q.shape[0])
    index = int(y / 160.0 * 4.0)
    current_label[frame, index] = not current_label[frame, index]
    do_set_values[0] = index
    do_set_values[1] = current_label[frame, index]
    render(frame)
    update_canvas()
    
canvas.on_mouse_down(handle_mouse_down)

def handle_mouse_up(x, y):
    do_set_values[0] = -1
    
canvas.on_mouse_up(handle_mouse_up)

    
# interactive render
interact(
    render, 
    frame=timeline
)

update_canvas()
display(buttons)
display(canvas)
display(viewer)

## Compute the Oracle

After training we can append the newly trained vector to the fully trained vector and create the classifier

In [None]:
# append the newly trained values to all the trained values
trained_feature_vector = np.concatenate([trained_feature_vector, current_feature_vector])
trained_label = np.concatenate([trained_label, current_label])

In [None]:
# create the classifier
mirror_trained_feature_vector = trained_feature_vector.copy().reshape(-1, 3)
mirror_trained_feature_vector[:, 0] *= -1

classifier_feature_vector = np.concatenate([trained_feature_vector, mirror_trained_feature_vector.reshape(trained_feature_vector.shape[0], -1)])
classifier_label = np.concatenate([trained_label, trained_label[:, [2,3,0,1]]])

classifier = NearestNeighbors(n_neighbors=n_neighbors)
classifier.fit(classifier_feature_vector)

In [None]:
def predict(features):
    
    indices = classifier.kneighbors(features, return_distance=False)
    label_probability = np.sum(classifier_label[indices], axis=1)/10.0
    label = np.zeros_like(label_probability, dtype=np.int32)
    less = label_probability <= 0.4
    more = label_probability >= 0.6
    between = np.logical_and( label_probability > 0.4 , label_probability < 0.6 )
    between_frame, between_label = np.where(between)
    
    label[less] = 0
    label[more] = 1
    for f, i in zip(between_frame, between_label):
        if f > 0:
            if label[f-1, i]:
                label[f, i] = 1
    
    return label

## Iterate....

We can go an pick the next vector to train

In [None]:
# get the first range, to see if it works
animation_q, animation_p = animations[0].quats[window_size:clip_length*3,...], animations[0].pos[window_size:clip_length*3,...]
_, animation_gp = lab.utils.quat_fk(animation_q, animation_p, animations[0].parents)
current_feature_vector = feature_vector[:clip_length*3]
current_label = predict(current_feature_vector)

In [None]:
# get the worst labeling
distances, indices = classifier.kneighbors(feature_vector, n_neighbors=1, return_distance=True)
clip_start_frame = int(np.floor(distances.argmax()/clip_length)) * clip_length
anim_id, anim_frame = feature_vector_indices[clip_start_frame]

animation_q, animation_p = animations[anim_id].quats[anim_frame:anim_frame+clip_length,...], animations[anim_id].pos[anim_frame:anim_frame+clip_length,...]
_, animation_gp = lab.utils.quat_fk(animation_q, animation_p, animations[0].parents)
current_feature_vector = feature_vector[clip_start_frame:clip_start_frame+clip_length]
current_label = predict(current_feature_vector)

## Load and Save

In [None]:
## commented out to avoid writing by mistake

# import pickle

# with open('foot_feature_vector.dat', 'wb') as f:
#     pickle.dump((trained_feature_vector, trained_label), f)

In [None]:
import pickle
with open('foot_feature_vector.dat', 'rb') as f:
    features, labels = pickle.load(f)
    trained_feature_vector = features
    trained_label = labels
    
display(trained_label.shape[0])
display(trained_label.shape[0]/clip_length)