# Real-Time Planning for Parameterized Human Motion

by Wan-Yen Lo and Matthias Zwicker
2008

Notebook by Jerome Eippers, 2025

In [None]:
%matplotlib widget
import pickle
from random import randrange
from random import uniform
import numpy as np
from ipywidgets import widgets, interact
from matplotlib import pyplot as plt
from matplotlib import colors as plt_color
from sklearn.ensemble import ExtraTreesRegressor
import ipyanimlab as lab

viewer = lab.Viewer(move_speed=5, width=1280, height=720)

## Load

In [None]:
# load the character
character = viewer.import_usd_asset('AnimLabSimpleMale.usd')
character.add_bone('LeftHeel', np.array([1,0,0,0]), np.array([9.2,0,-12]), 'LeftFoot')
character.add_bone('LeftBall', np.array([1,0,0,0]), np.array([14.5,0,8.22]), 'LeftFoot')
character.add_bone('RightHeel', np.array([1,0,0,0]), np.array([-9.2,0,-12]), 'RightFoot')
character.add_bone('RightBall', np.array([1,0,0,0]), np.array([-14.5,0,8.22]), 'RightFoot')

left_heel = character.bone_index('LeftHeel')
left_ball = character.bone_index('LeftBall')
right_heel = character.bone_index('RightHeel')
right_ball = character.bone_index('RightBall')
left_foot = character.bone_index('LeftFoot')
right_foot = character.bone_index('RightFoot')
left_toe = character.bone_index('LeftToe')
right_toe = character.bone_index('RightToe')
foottag_indices = np.asarray([left_heel, left_ball, right_heel, right_ball], dtype=np.int8)
print(foottag_indices)

In [None]:
direction = viewer.import_usd_asset('../../meshes/displacement.usd')

In [None]:
target = viewer.create_asset(
    vertices = np.asarray([[9.37, 0.58, -6.81, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [3.58, 0.58, -11.01, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [-3.58, 0.58, -11.01, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [-9.37, 0.58, -6.81, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [-11.58, 0.58, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [-9.37, 0.58, 6.81, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [-3.58, 0.58, 11.01, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [3.58, 0.58, 11.01, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [9.37, 0.58, 6.81, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [11.58, 0.58, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.58, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0]], dtype=np.float32),
    indices = np.asarray([[1, 2, 10], [3, 4, 10], [5, 6, 10], [7, 8, 10], [9, 0, 10]], dtype=np.int16)
)

## Animation Data Source
The animation data used in this notebook was generated using [Motorica.ai](https://motorica.ai).  
Please note that this data is provided under Motorica’s [Terms of Service](https://motorica2024.webflow.io/terms-of-service).  


In [None]:
with open('realtime_planning_animations_data.bin', 'rb') as f:
    animations_matrices_datas, anim_bones = pickle.load(f)

In [None]:
animmap = lab.AnimMapper(character, root_motion=True, match_effectors=False)
animations = []

for matrices_anim, left_clip, right_clip in animations_matrices_datas:
    q, p = lab.utils.m4x4_to_qp(matrices_anim)
    anim = lab.Anim(q, p, p[0], None, anim_bones)
    anim = animmap(anim)
    animations.append(anim)

bone_count = character.bone_count()
bones = animations[0].bones
parents = animations[0].parents

In [None]:
def render(frame, index=0):

    frame = min(frame, animations[index].quats.shape[0] -1)
    q = (animations[index].quats[frame,...])
    p = (animations[index].pos[frame,...])
        
    a =  lab.utils.quat_to_mat(q, p)
    viewer.set_shadow_poi(p[0])
    
    viewer.begin_shadow()
    viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.draw(character, a)
    
    viewer.end_display()

    viewer.disable(depth_test=True)
   
    viewer.draw_axis(character.world_skeleton_xforms(a), 5)
    viewer.draw_lines(character.world_skeleton_lines(a))
    
    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(max=100),
    index=widgets.IntSlider(max=len(animations)-1)
)
viewer

* 0 -> 9 - walks curved
* 10 -> 23 - turns
* 24 - walk straight  
* 25 - start  
* 26 - stop  
* 27 - stop  
* 28 - stop  
* 29 - walk real slow  
* 30 - walk slow  
* 31 -> 46 -  stop  

## Motion Model: Motion Clips

In this notebook, human motion is modeled as a collection of **short motion clips**, where **each clip represents a single walking step**.  
This design follows the approach in *near-optimal character animation with continous control (2007)*.

Each step is extracted from motion capture data and organized so that the clips can be **sequenced and blended** to form continuous locomotion.

### Foot Contact Alignment

To ensure **realistic and smooth transitions** between steps, each clip is **temporally aligned** at the foot contact frames.  
This means the end of one step naturally matches the beginning of the next, minimizing foot sliding or artifacts when clips are concatenated.

In [None]:
MAX_STEP_LEN = 64
LEFT_CLIP_COUNT = sum((len(left_clip) for matrices_anim, left_clip, right_clip in animations_matrices_datas))
RIGHT_CLIP_COUNT = sum((len(right_clip) for matrices_anim, left_clip, right_clip in animations_matrices_datas))
CLIP_COUNT = LEFT_CLIP_COUNT + RIGHT_CLIP_COUNT
CLIP_COUNT

In [None]:
clips_q = np.array([1,0,0,0], dtype=np.float32)[np.newaxis,...].repeat(CLIP_COUNT * MAX_STEP_LEN * bone_count, axis=0).reshape(CLIP_COUNT, MAX_STEP_LEN, bone_count, 4)
clips_p = np.array([0,0,0], dtype=np.float32).repeat(CLIP_COUNT* MAX_STEP_LEN * bone_count).reshape(CLIP_COUNT, MAX_STEP_LEN, bone_count, 3)
clips_timings = np.zeros([CLIP_COUNT, 5], dtype=np.uint32)
clips_sources = np.zeros([CLIP_COUNT], dtype=np.uint32)
clips_sides = np.zeros([CLIP_COUNT], dtype=np.uint8)

In [None]:
def compute_root(q, p):
    g_q, g_p = lab.utils.quat_fk(q, p, animations[0].parents)
    v = lab.utils.quat_mul_vec(g_q[:, character.bone_index('Hips'), :], np.array([0,1,0]))
    #v = g_p[:, character.bone_index('RightHand'), :] - g_p[:, character.bone_index('Spine2'), :]
    angle = np.atan2(v[:, 0], v[:, 2])
    g_q[:, 0, 0] = np.cos(angle/2)
    g_q[:, 0, 2] = np.sin(angle/2)
    q, p = lab.utils.quat_ik(g_q, g_p, animations[0].parents)
    q[:, 0, :], p[:, 0, :] = lab.utils.qp_mul(lab.utils.qp_inv((q[0:1, 0, :], p[0:1, 0, :])), (q[:, 0, :], p[:, 0, :]))
    return q, p

def compute_clip(quats, pos, ranges):
      
    aq = quats[ranges[0]:ranges[4],...].copy()
    ap = pos[ranges[0]:ranges[4],...].copy()

    q = np.array([1,0,0,0], dtype=np.float32)[np.newaxis,...].repeat(MAX_STEP_LEN * bone_count, axis=0).reshape(MAX_STEP_LEN, bone_count, 4)
    p = np.array([0,0,0], dtype=np.float32).repeat(MAX_STEP_LEN * bone_count).reshape(MAX_STEP_LEN, bone_count, 3)

    q[:aq.shape[0], ...] = aq
    p[:ap.shape[0], ...] = ap
    
    iq, ip = lab.utils.qp_inv((q[0,0], p[0,0]))
    
    q[:,0], p[:,0] = lab.utils.qp_mul(
        (iq[np.newaxis,...], ip[np.newaxis,...]),
        (q[:,0],p[:,0])
    )

    return compute_root(q, p)

In [None]:
left_clip_count = 0
right_clip_count = 0

for i in range(len(animations)):
    quats, pos = animations[i].quats.copy(), animations[i].pos.copy()
    _, l_ranges, r_ranges = animations_matrices_datas[i]
    
    for ranges in l_ranges:
        ranges = np.array(ranges, dtype=np.uint32)
        left_anim = compute_clip(quats, pos, ranges)
        clips_q[left_clip_count, ...], clips_p[left_clip_count, ...] = left_anim
        clips_timings[left_clip_count, 4] = ranges[0]
        clips_timings[left_clip_count, :4] = ranges[1:] - ranges[0]
        clips_sources[left_clip_count] = i
        clips_sides[left_clip_count] = 0
        left_clip_count += 1
           
    for ranges in r_ranges:
        ranges = np.array(ranges, dtype=np.uint32)
        right_anim = compute_clip(quats, pos, ranges)
        clips_q[right_clip_count + LEFT_CLIP_COUNT, ...], clips_p[right_clip_count + LEFT_CLIP_COUNT, ...] = right_anim
        clips_timings[right_clip_count + LEFT_CLIP_COUNT, 4] = ranges[0]
        clips_timings[right_clip_count + LEFT_CLIP_COUNT, :4] = ranges[1:] - ranges[0]
        clips_sources[right_clip_count + LEFT_CLIP_COUNT] = i
        clips_sides[right_clip_count + LEFT_CLIP_COUNT] = 1
        right_clip_count += 1

clips_q = lab.utils.remove_quat_discontinuities(clips_q)

In [None]:
def compute_constraint_qp(gpos, frame, foot_id, toe_id):
    vec = gpos[frame, toe_id, :] - gpos[frame, foot_id, :]
    angle = np.arctan2(vec[0], vec[2])/2
    q = np.zeros(4, dtype=np.float32)
    p = np.zeros(3, dtype=np.float32)
    q[0] = np.cos(angle)
    q[2] = np.sin(angle)
    p[[0,2]] = gpos[frame, foot_id, [0,2]]
    return q, p

In [None]:
class MotionClip:
    def __init__(self, clip_id):
        self._id = clip_id
        self.timings = clips_timings[clip_id, :4]
        self.quats = clips_q[clip_id, ...]
        self.pos = clips_p[clip_id, ...]
        self.side = clips_sides[clip_id]

        self.constraints_q = np.zeros([2, 4], dtype=np.float32)
        self.constraints_p = np.zeros([2, 3], dtype=np.float32)

        self.delta_theta = np.atan2(
            2 * self.quats[self.timings[1], 0, 0] * self.quats[self.timings[1], 0, 2], 
            1.0 - (2 * self.quats[self.timings[1], 0, 2] * self.quats[self.timings[1], 0, 2])
        )

        _, gpos = lab.utils.quat_fk(self.quats, self.pos, animations[0].parents)
        if self.side == 0:
            self.constraints_q[0, :], self.constraints_p[0, :] = compute_constraint_qp(
                gpos, 
                self.timings[0],
                foottag_indices[0],
                foottag_indices[1]
            )
            self.constraints_q[1, :], self.constraints_p[1, :] = compute_constraint_qp(
                gpos, 
                self.timings[2],
                foottag_indices[2],
                foottag_indices[3]
            )
        else:
            self.constraints_q[0, :], self.constraints_p[0, :] = compute_constraint_qp(
                gpos, 
                self.timings[0],
                foottag_indices[2],
                foottag_indices[3]
            )
            self.constraints_q[1, :], self.constraints_p[1, :] = compute_constraint_qp(
                gpos, 
                self.timings[2],
                foottag_indices[0],
                foottag_indices[1]
            )

In [None]:
motion_clips = [MotionClip(i) for i in range(CLIP_COUNT)]

In [None]:
def render(frame, clip_id=0):

    q = motion_clips[clip_id].quats[frame].copy()
    p = motion_clips[clip_id].pos[frame].copy()
    
    a = lab.utils.quat_to_mat(q, p)
    viewer.set_shadow_poi(p[0])
    
    viewer.begin_shadow()
    viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.draw(character, a)

    contacts_matrices = np.eye(4, dtype=np.float32)[np.newaxis,...].repeat(2, axis=0)
    contacts_matrices = lab.utils.quat_to_mat( motion_clips[clip_id].constraints_q,  motion_clips[clip_id].constraints_p )

    viewer.draw(target, contacts_matrices)
    
    viewer.end_display()

    viewer.disable(depth_test=True)

    viewer.draw_axis(contacts_matrices, 20)
    viewer.draw_axis(character.world_skeleton_xforms(a), 5)
    viewer.draw_lines(character.world_skeleton_lines(a))
    
    viewer.execute_commands()
    display(motion_clips[clip_id].timings)
    
interact(
    render, 
    frame=lab.Timeline(max=MAX_STEP_LEN-1),
    clip_id=widgets.IntSlider(max=CLIP_COUNT-1)
)
viewer

## Player

Let's have a player that can play animations and blend clips together.

In [None]:
class ClipPlayer:
    def __init__(self, motion):
        self.motion = motion
        self.frame = -1
        self.start_at_out_frame = 0
        self.start_clip_frame = 0
        self.blend_in_frame_count = 0
        self.quaternions = motion.quats.copy()
        self.positions = motion.pos.copy()      
        
    def align_to_out(self, out_clip):
        pre_contact_blend_time = min(out_clip.motion.timings[ 2] - out_clip.motion.timings[ 1], self.motion.timings[ 0])
        post_contact_blend_time = min(out_clip.motion.timings[ 3] - out_clip.motion.timings[ 2], self.motion.timings[ 1] - self.motion.timings[ 0])
        
        self.start_at_out_frame = out_clip.motion.timings[ 2] - pre_contact_blend_time
        self.start_clip_frame = self.motion.timings[ 0] - pre_contact_blend_time
        self.blend_in_frame_count = pre_contact_blend_time + post_contact_blend_time
        self.frame = self.start_clip_frame

        #align motion
        iq, ip = lab.utils.qp_inv((self.motion.constraints_q[ 0], self.motion.constraints_p[ 0]))
        self.quaternions[:,0], self.positions[:,0] = lab.utils.qp_mul(
            (iq[np.newaxis,...], ip[np.newaxis,...]),
            (self.quaternions[:,0],self.positions[:,0])
        )
        q, p = lab.utils.qp_mul(
            (out_clip.quaternions[0, 0], out_clip.positions[0, 0]),
            (out_clip.motion.constraints_q[ 1], out_clip.motion.constraints_p[ 1]),
        )
        self.quaternions[:,0], self.positions[:,0] = lab.utils.qp_mul(
            (q[np.newaxis,...], p[np.newaxis,...]),
            (self.quaternions[:,0],self.positions[:,0])
        )
        

    def tick(self, forced_frame=None):
        if forced_frame is not None:
            self.frame = forced_frame
        elif self.frame < self.motion.timings[ 3]-1:
            self.frame += 1

        
class Player:
    def __init__(self, motion_list):
        self.motion_list = motion_list
        self.current_clip = None
        self.next_clip = None
        self.quaternions = np.array([1,0,0,0], dtype=np.float32)[np.newaxis,...].repeat(bone_count, axis=0)
        self.positions = np.zeros([bone_count, 3], dtype=np.float32)
        self.last_clip_position = np.zeros([3], dtype=np.float32)

    def set_next_clip(self, clip_id):
        if self.current_clip is None:
            self.current_clip = ClipPlayer(self.motion_list[clip_id])
        else:
            self.next_clip = ClipPlayer(self.motion_list[clip_id])
            self.next_clip.align_to_out(self.current_clip)
        self.last_clip_position = self.positions[0, :]

    def tick(self):
        if self.current_clip is not None:
            self.current_clip.tick()

            self.quaternions = self.current_clip.quaternions[self.current_clip.frame]
            self.positions = self.current_clip.positions[self.current_clip.frame]

            if self.next_clip is not None:
                if self.current_clip.frame >= self.next_clip.start_at_out_frame:
                    tick_frame = self.next_clip.start_clip_frame + self.current_clip.frame - self.next_clip.start_at_out_frame
                    self.next_clip.tick(tick_frame)

                    t = float(self.current_clip.frame - self.next_clip.start_at_out_frame) / float(self.next_clip.blend_in_frame_count)
                    self.quaternions = lab.utils.quat_slerp(self.current_clip.quaternions[self.current_clip.frame], self.next_clip.quaternions[self.next_clip.frame], t)
                    self.positions = (1.0-t) * self.current_clip.positions[self.current_clip.frame] + (t) * self.next_clip.positions[self.next_clip.frame]
                    
                    if t >= .99 or self.next_clip.frame >= self.next_clip.motion.timings[ 1] or self.current_clip.frame >= self.current_clip.motion.timings[ 3] - 1 :
                        self.current_clip = self.next_clip
                        self.next_clip = None                 
                    

In [None]:
player = Player(motion_clips)
player.set_next_clip(0)

def render(frame):

    if player.next_clip is None:
        if player.current_clip.motion.side == 0:
            player.set_next_clip(randrange(LEFT_CLIP_COUNT, CLIP_COUNT))
        else:
            player.set_next_clip(randrange(0, LEFT_CLIP_COUNT))
    
    player.tick()
    
    q = player.quaternions
    p = player.positions
 
    a = lab.utils.quat_to_mat(q, p)
    viewer.set_shadow_poi(p[0])
    
    viewer.begin_shadow()
    viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.draw(character, a)
    viewer.end_display()

    viewer.disable(depth_test=True)

    a = lab.utils.quat_to_mat(player.current_clip.quaternions[player.current_clip.frame], player.current_clip.positions[player.current_clip.frame])
    viewer.draw_lines(character.world_skeleton_lines(a), np.array([1,0,0], dtype=np.float32))
    viewer.draw_axis(character.world_skeleton_xforms(a), 5)
    if player.next_clip is not None:
        f = max(player.next_clip.frame, 0)
        a = lab.utils.quat_to_mat(player.next_clip.quaternions[f], player.next_clip.positions[f])
        viewer.draw_lines(character.world_skeleton_lines(a), np.array([0,1,0], dtype=np.float32))
        viewer.draw_axis(character.world_skeleton_xforms(a), 5)
    
    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(max=MAX_STEP_LEN-1)
)
viewer

## Transition Cost: Ensuring Physical Continuity Between Clips

To generate smooth, physically plausible motion, we need to ensure that transitions between motion clips do not produce visible artifacts such as foot sliding, abrupt changes in joint positions, or unrealistic discontinuities in momentum.

Setting the cost to be infinitaly high if it blends on the wrong foot.


In [None]:
%%time

progress_output = widgets.Output(layout={'border': '1px solid black'})
display(progress_output)

bone_filter = [bones.index("Hips"), bones.index("Spine"), bones.index("Spine1"), bones.index("Spine2"), bones.index("LeftUpLeg"), bones.index("LeftLeg"), bones.index("LeftFoot"), bones.index("RightUpLeg"), bones.index("RightLeg"), bones.index("RightFoot")]

def pre_compute_transitions_costs(motion_clips, file_name=None):

    count = len(motion_clips)
    
    physics_costs = np.ones([count, count]) * -1
    delta_theta = np.zeros([count, count])
    delta_x = np.zeros([count, count])
    delta_z = np.zeros([count, count])

    
    for i in range(len(motion_clips)):
        
        progress_output.clear_output()
        display(f"clip {i} / {len(motion_clips)}")

        a = ClipPlayer(motion_clips[i])
        a.quaternions[:, 0], a.positions[:, 0] = lab.utils.qp_mul(
            lab.utils.qp_inv((a.quaternions[motion_clips[i].timings[1], 0][np.newaxis,...], a.positions[motion_clips[i].timings[1], 0][np.newaxis,...])),
            (a.quaternions[:, 0], a.positions[:, 0])
        )
        aq, ap = lab.utils.quat_fk(a.quaternions, a.positions, parents)

        for j in range(len(motion_clips)):
            if motion_clips[i].side != motion_clips[j].side:
                
                b = ClipPlayer(motion_clips[j])
                b.align_to_out(a)
    
                # compute cost
                bq, bp = lab.utils.quat_fk(b.quaternions, b.positions, parents)
                for k in range(b.blend_in_frame_count):
                    v = ap[b.start_at_out_frame+k, bone_filter, ...] - bp[b.start_clip_frame+k, bone_filter, ...]
                    physics_costs[i, j] += np.sum(np.sqrt(np.sum(v*v, axis=1)))
                physics_costs[i, j] /= b.blend_in_frame_count
                
                # compute deltas
                delta_x[i, j] = bp[motion_clips[j].timings[1], 0, 0]
                delta_z[i, j] = bp[motion_clips[j].timings[1], 0, 2]
                delta_theta[i, j] = np.atan2(
                    2 * bq[motion_clips[j].timings[1], 0, 0] * bq[motion_clips[j].timings[1], 0, 2], 
                    1.0 - (2 * bq[motion_clips[j].timings[1], 0, 2] * bq[motion_clips[j].timings[1], 0, 2])
                )

    # normalize
    physics_costs[:,:] /= physics_costs[physics_costs>=0].mean()

    # discard the impossible transitions between feet
    physics_costs[physics_costs<0] = 10000

    if file_name is not None:
        with open(file_name, 'wb') as f:
            pickle.dump((physics_costs, delta_theta, delta_x, delta_z), f)

    return physics_costs, delta_theta, delta_x, delta_z

# with progress_output:
#     physics_costs, delta_theta, delta_x, delta_z = pre_compute_transitions_costs(motion_clips, 'realtime_planning_animations_costs.dat')

with open('realtime_planning_animations_costs.dat', 'rb') as f:
    physics_costs, delta_theta, delta_x, delta_z = pickle.load(f)

## Optimal policy

#### Value-Function Learning Pipeline

1. **Initialize**
   - Set $V^{(0)}(s) = 0$ for all $s$.
   - Create empty training sets $\mathcal{T}_a$ for every clip $a \in \mathcal{A}$.

2. **Repeat until convergence**
   1. **Sampling**
      - For $k = 1 \dots K$ trajectories:
        - Pick random start state $s_0$.
        - While not terminal:
          - Choose action $a_t = \operatorname{argmax}_{a'} \bigl[R(s_t,a') + \gamma V^{(i)}(f(s_t,a'))\bigr]$.
          - Store tuple $(s_t, v_t)$ with  
            $v_t = R(s_t,a_t) + \gamma V^{(i)}(f(s_t,a_t))$.
          - Step to $s_{t+1}=f(s_t,a_t)$.

   2. **Regression**
      - For every clip $a$:
        - Build **Extra-Tree** on $\mathcal{T}_a$:
          - I've used scipy implementation.

   3. **Update**
      - Construct new approximation $V^{(i+1)}$ from the forests.
      - Re-evaluate all stored $(s,v)$ using $V^{(i+1)}$.

   4. **Check convergence**
      - Compute **Bellman residual**  
        $\displaystyle \text{Res} = \frac{1}{|\mathcal{T}|}\sum_{(s,v)\in\mathcal{T}} \Bigl| v - \max_{a'}\bigl[R(s,a') + \gamma V^{(i+1)}(f(s,a'))\bigr]\Bigr|^2$.


In [None]:
pre_compute_table = np.linspace(-np.pi, np.pi, 300)
clip_arange = np.arange(CLIP_COUNT)

def transition (clip_id, x, z, theta, next_clip_id):
    x_prime = x + np.sin(theta) * delta_z[clip_id, next_clip_id] + np.cos(theta) * delta_x[clip_id, next_clip_id]
    z_prime = z + np.cos(theta) * delta_z[clip_id, next_clip_id] - np.sin(theta) * delta_x[clip_id, next_clip_id]
    theta_prime = theta + delta_theta[clip_id, next_clip_id]
    theta_prime[theta_prime < -np.pi] += np.pi*2
    theta_prime[theta_prime > np.pi] -= np.pi*2
    return next_clip_id, x_prime, z_prime, theta_prime

def transition_reward (clip_id, next_clip_id):
    return -physics_costs[clip_id, next_clip_id] * 1.25

def state_reward(clip_id, theta):
    if clips_sources[clip_id] >= 25:
        return -10
    mc = motion_clips[clip_id]
    theta_prime = theta + mc.delta_theta
    if theta_prime < np.pi:
        theta_prime += np.pi*2
    if theta_prime > np.pi:
        theta_prime -= np.pi*2
    return -np.abs(theta)

def get_value_function(value_functions, clip_id, theta_prime):
    future_indices = np.searchsorted(pre_compute_table, theta_prime, side='right')
    return value_functions[clip_id, future_indices]
    
def use_optimal_policy(value_functions, alpha, state_clip, state_theta):
    _, x_prime, z_prime, theta_prime = transition(state_clip, 0, 0, state_theta, ...)

    reward = transition_reward(state_clip, ...)
    reward += alpha * get_value_function(value_functions, clip_arange, theta_prime)

    picked = np.argmax(reward)

    return picked, x_prime[picked], z_prime[picked], theta_prime[picked], reward[picked] + state_reward(state_clip, state_theta)

In [None]:
def refit_tree(model, X_new, y_new):
    for tree_estimator in model.estimators_:
        # Get the leaf indices for the new data
        leaf_indices = tree_estimator.apply(X_new)
        
        # Get the tree object
        tree = tree_estimator.tree_
        
        # Get the unique leaf nodes present in the new data for this tree
        unique_leaves = np.unique(leaf_indices)
        
        for leaf_index in unique_leaves:
            # Find the new data points that fall into this leaf
            new_data_in_leaf_mask = leaf_indices == leaf_index
            
            # Calculate the new mean for this leaf
            new_leaf_mean = np.mean(y_new[new_data_in_leaf_mask])
            
            # Update the value of the leaf node
            # The value is a 1D array of size 1 for regression
            tree.value[leaf_index, 0, 0] = new_leaf_mean

In [None]:
%%time
display(progress_output)

ALPHA = .95
EPOCH = 5
FREEZE_EPOCH = 4

def train_optimal_policy():
    progress_output.clear_output()
    
    value_functions = None
    value_functions_precompute = np.zeros([CLIP_COUNT, pre_compute_table.shape[0]])

    scores = np.zeros([EPOCH, 6])

    count = np.zeros([CLIP_COUNT], dtype=np.uint32)
    X = np.zeros([CLIP_COUNT, 100000])
    y = np.zeros([CLIP_COUNT, 100000])
    
    for epoch in range(EPOCH):
        display(f"epoch : [{epoch}]")

        # we stop adding new data once we are in the freeze tree state
        if epoch < FREEZE_EPOCH:
            #build a data set
            display(f"gather path data")

            for path in range(800):
                clip = randrange(0, CLIP_COUNT)
                theta = uniform(-np.pi, np.pi)
        
                for steps in range(10):
                    next_clip, _, _, theta_prime, reward = use_optimal_policy(value_functions_precompute, ALPHA, clip, theta)
                    
                    X[clip, count[clip]] = theta
                    y[clip, count[clip]] = reward
                    count[clip] += 1
    
                    clip = next_clip
                    theta = theta_prime

       

        #train the value functions
        display(f"train the value functions")
        # after 4 iteration we freeze the tree structures
        if epoch < FREEZE_EPOCH:
            value_functions = []
            for i in range(CLIP_COUNT):
                X_train, y_train = X[i, :count[i]], y[i, :count[i]]
                model = ExtraTreesRegressor(n_estimators=50, random_state=None, n_jobs=-1)
                model.fit(X_train.reshape(-1, 1), y_train)
                value_functions.append(model)
        else:
            for i in range(CLIP_COUNT):
                X_train, y_train = X[i, :count[i]], y[i, :count[i]]
                refit_tree(value_functions[i], X_train.reshape(-1, 1), y_train)

        #precompute value functions
        display(f"precompute value functions")
        for i in range(CLIP_COUNT):
            value_functions_precompute[i, :] = value_functions[i].predict(pre_compute_table[:, np.newaxis])

        
        #compute bellman residual
        residuals = np.zeros([CLIP_COUNT * 100000])
        c = 0
        
        # recompute the best value
        display(f"re evaluate Value functions")
        for i in range(CLIP_COUNT):
            for j in range(count[i]):
                _, _, _, _, reward = use_optimal_policy(value_functions_precompute, ALPHA, i, X[i,j])

                residuals[c] = np.abs(get_value_function(value_functions_precompute, i, X[i,j]) - reward)
                y[i, j] = reward
                c+=1
                
        progress_output.clear_output()
        display(f"residuals; mean {residuals[:c].mean()} max {residuals[:c].max()}")
        scores[epoch, 0] = residuals[:c].min()
        scores[epoch, 1] = residuals[:c].max()
        scores[epoch, 2] = residuals[:c].mean()

    with open('realtime_planning_orientation_value_functions.dat', 'wb') as f:
        pickle.dump((value_functions_precompute, scores), f)

    return value_functions_precompute, scores

# with progress_output:
#     value_functions_precompute, scores = train_optimal_policy()

with open('realtime_planning_orientation_value_functions.dat', 'rb') as f:
    value_functions_precompute, scores = pickle.load(f)

In [None]:
x = np.arange(EPOCH)

fig, axs = plt.subplots(1, 1, figsize=(10, 4), sharex=True)

# First subplot
axs.plot(x, scores[:, 2], label='Mean', color='blue')
axs.fill_between(x, scores[:, 0], scores[:, 1], color='lightblue', alpha=0.4, label='Min-Max Range')
axs.set_title('Scores over Epoch (Mean and Range)')
axs.set_ylabel('Value')
axs.legend()
axs.grid(True)

plt.tight_layout()
plt.show()


In [None]:
gamepad = widgets.Controller(index=0)
gamepad

In [None]:
player = Player(motion_clips)
player.set_next_clip(0)

def render(frame):

    controller_orient = np.array([1,0,0,0], dtype=np.float32)
    posx = gamepad.axes[0].value 
    posz = -gamepad.axes[1].value 
    if np.abs(posx) > 0.001 or np.abs(posz) > 0.001:
        angle = np.atan2(posz, posx)
        controller_orient[0] = np.cos(angle/2)
        controller_orient[2] = np.sin(angle/2)


    if player.next_clip is None:
        f = player.current_clip.motion.timings[1]

        q = lab.utils.quat_mul(lab.utils.quat_inv(controller_orient), player.current_clip.quaternions[f, 0, :])
        theta =  np.atan2(
            2 * q[0] * q[2], 
            1.0 - (2 * q[2] * q[2])
        )
    
        next_clip, _, _, _, _ = use_optimal_policy(value_functions_precompute, ALPHA, player.current_clip.motion._id, theta)
        
        player.set_next_clip(next_clip)

    player.tick()
    
    q = player.quaternions
    p = player.positions
 
    a = lab.utils.quat_to_mat(q, p)
    viewer.set_shadow_poi(p[0])
    
    viewer.begin_shadow()
    viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.draw(character, a)
    d = lab.utils.quat_to_mat(controller_orient, p[0])
    viewer.draw(direction, d)
    
    viewer.end_display()

    viewer.disable(depth_test=True)

    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(max=MAX_STEP_LEN-1)
)
viewer

### Reach goal policy

In [None]:
# get the indices of the stopping steps
# we know the indices of the animations, we only take the last step of those as the stop it self
def _last_step(source_id):
    potential = np.argwhere(clips_sources == source_id)[:, 0]
    return potential[clips_timings[potential, -1].argsort()[-1]]

stops_indices = np.array( [_last_step(index) for index in [26,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46]], dtype=np.uint16)
stops_indices

In [None]:
# compute local position to end on 0,0,0
stops_positions = []
for id in stops_indices:
    player = Player(motion_clips)
    player.set_next_clip(id)
    player.current_clip.quaternions[:, 0], player.current_clip.positions[:, 0] = lab.utils.qp_mul(
        lab.utils.qp_inv((player.current_clip.quaternions[player.current_clip.motion.timings[3]-1, 0][np.newaxis,...], player.current_clip.positions[player.current_clip.motion.timings[3]-1, 0][np.newaxis,...])),
        (player.current_clip.quaternions[:, 0], player.current_clip.positions[:, 0])
    )
    pos =  - player.current_clip.positions[player.current_clip.motion.timings[1], 0]
    pos = lab.utils.quat_mul_vec(lab.utils.quat_inv(player.current_clip.quaternions[player.current_clip.motion.timings[1], 0]), pos)
    stops_positions.append(pos)
stops_positions = np.array(stops_positions, dtype=np.float32)
stops_positions

In [None]:
pre_compute_table_x = np.linspace(-1000, 1000, 1001)
pre_compute_table_z = np.linspace(-1000, 1000, 1001)

clip_arange = np.arange(CLIP_COUNT)
mask = np.ones(clip_arange.shape, bool)
mask[np.argwhere(clips_sources == 25)[:, 0]] = False
mask[np.argwhere(clips_sources == 29)[:, 0]] = False
mask[np.argwhere(clips_sources == 30)[:, 0]] = False
clip_arange = clip_arange[mask]

clip_arange_no_stop = np.arange(CLIP_COUNT)
mask = np.ones(clip_arange_no_stop.shape, bool)
mask[stops_indices] = False
mask[np.argwhere(clips_sources == 25)[:, 0]] = False
mask[np.argwhere(clips_sources == 29)[:, 0]] = False
mask[np.argwhere(clips_sources == 30)[:, 0]] = False
clip_arange_no_stop = clip_arange_no_stop[mask]


def transition (clip_id, x, z, next_clip_id):
    x_prime = x - delta_x[clip_id, next_clip_id]
    z_prime = z - delta_z[clip_id, next_clip_id]
    theta_prime = delta_theta[clip_id, next_clip_id]
    
    x_rot = np.cos(theta_prime) * x_prime - np.sin(theta_prime) * z_prime 
    z_rot = np.cos(theta_prime) * z_prime + np.sin(theta_prime) * x_prime
    
    return next_clip_id, x_rot, z_rot

def transition_inv (previous_clip_id, clip_id, x, z):
    theta_prime = -delta_theta[previous_clip_id, clip_id]
    
    x_rot = np.cos(theta_prime) * x - np.sin(theta_prime) * z 
    z_rot = np.cos(theta_prime) * z + np.sin(theta_prime) * x
    
    return previous_clip_id, x_rot + delta_x[previous_clip_id, clip_id], z_rot + delta_z[previous_clip_id, clip_id]

def transition_reward (clip_id, next_clip_id):
    return -physics_costs[clip_id, next_clip_id] * 2.

def state_reward(clip_id, x, z):
    if clip_id in stops_indices:
        index = np.argwhere(stops_indices == clip_id)[0][0]
        px = x - stops_positions[index, 0]
        pz = z - stops_positions[index, 2]

        dist = np.sqrt(np.sum(px*px + pz*pz))
        if dist < 50:
            return 1
        else:
            return -100

    dist = np.sqrt(np.sum(x*x + z*z))
    if dist > 300:
        return (300-dist) * 0.005
    return 0

def get_value_function(value_functions, clip_id, x_prime, z_prime):
    x_indices = np.clip(np.searchsorted(pre_compute_table_x, x_prime)-1, 0, pre_compute_table_x.shape[0]-1)
    z_indices = np.clip(np.searchsorted(pre_compute_table_z, z_prime)-1, 0, pre_compute_table_z.shape[0]-1)
    return value_functions_precompute[clip_id, z_indices, x_indices]

def use_optimal_policy(value_functions, alpha, state_clip, x, z, no_stop=False):
    indices = clip_arange
    if no_stop:
        indices = clip_arange_no_stop
        
    _, x_prime, z_prime = transition(state_clip, x, z, indices)

    reward = transition_reward(state_clip, indices)
    reward += alpha * get_value_function(value_functions, indices, x_prime, z_prime)

    picked =np.argmax(reward)

    # now let's forbid taking a transition toward a stop if it fails
    if no_stop==False and state_reward(clip_arange[picked], x_prime[picked], z_prime[picked]) < -99:
        return use_optimal_policy(value_functions, alpha, state_clip, x, z, True)
        
    return indices[picked], x_prime[picked], z_prime[picked], reward[picked] + state_reward(state_clip, x, z)

In [None]:
def rollback_trajectories(state_clip, state_x, state_z, alpha, spread=1, rollback_len=10):
    
    trajectory = []
    trajectory.append((
        state_clip, state_x, state_z, state_reward(state_clip, state_x, state_z)
    ))
    
    def _roll(state_clip, state_x, state_z):
        
        previous_clip = physics_costs[:, state_clip].argsort()[randrange(0, spread)]
        _, xp, zp = transition_inv(previous_clip, state_clip, state_x, state_z )

        trajectory.append((
            previous_clip, xp, zp, trajectory[-1][3] * alpha - physics_costs[previous_clip, state_clip] * .1
        ))
        
        if len(trajectory) < rollback_len:
            _roll(previous_clip, xp, zp)

    _roll(state_clip, state_x, state_z)
    return trajectory

In [None]:
players = []
def _create_one_player ():
    player = Player(motion_clips)
    stop_id = randrange(0, len(stops_indices))
    
    player.trajectory = list( reversed (rollback_trajectories(stops_indices[stop_id], stops_positions[stop_id,0], stops_positions[stop_id,2], .95, spread=5, rollback_len=8)) )
    player.set_next_clip(player.trajectory[0][0])
    player.current_clip.frame = player.current_clip.motion.timings[1]
    player.current_clip.quaternions[:, 0], player.current_clip.positions[:, 0] = lab.utils.qp_mul(
        lab.utils.qp_inv((player.current_clip.quaternions[player.current_clip.motion.timings[1], 0][np.newaxis,...], player.current_clip.positions[player.current_clip.motion.timings[1], 0][np.newaxis,...])),
        (player.current_clip.quaternions[:, 0], player.current_clip.positions[:, 0])
    )
    player.trajectory_id = 0
    player.current_clip.positions[:, 0] -= [player.trajectory[0][1], 0, player.trajectory[0][2]]
    return player
    
for i in range(20):
    players.append(_create_one_player())

def render(frame):

    for player in players:
        if player.next_clip is None:
            player.trajectory_id += 1
            if player.trajectory_id < len(player.trajectory):
            
                player.set_next_clip(player.trajectory[player.trajectory_id][0])
    
        player.tick()
    
    q = player.quaternions
    p = player.positions

    # pos =  - p[0]
    # pos = lab.utils.quat_mul_vec(lab.utils.quat_inv(q[0]), pos)
 
    a = lab.utils.quat_to_mat(q, p)
    
    viewer.begin_shadow()
    for player in players:
        q = player.quaternions
        p = player.positions
        a = lab.utils.quat_to_mat(q, p)
        viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    for player in players:
        q = player.quaternions
        p = player.positions
        a = lab.utils.quat_to_mat(q, p)
        viewer.draw(character, a)
    viewer.end_display()

    viewer.disable(depth_test=True)


    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(max=MAX_STEP_LEN-1)
)
viewer

In [None]:
all_valid_states = np.zeros([1000000000, 4], dtype=np.float32)
all_valid_states_count = 0

def _rollback_all(state_clip, state_x, state_z, action_id, recursion_counter):
    global all_valid_states_count
    previous_clip = physics_costs[:, state_clip].argsort()[action_id]
    clip_prime, x_prime, z_prime = transition_inv(previous_clip, state_clip, state_x, state_z )
    if clip_prime not in stops_indices:
        all_valid_states[all_valid_states_count, :] = ((clip_prime, x_prime, z_prime, recursion_counter))
        all_valid_states_count += 1
        recursion_counter += 1
        if recursion_counter < 4:
            for i in range(5):
                _rollback_all(clip_prime, x_prime, z_prime, i, recursion_counter)

for state_clip in stops_indices:
    stop_id = np.argwhere(stops_indices == state_clip)[0][0]

    for _ in range(100):
        x = uniform(-20, 20)
        z = uniform(-20, 20)
    
        for i in range(5):
            all_valid_states[all_valid_states_count, :] = ((state_clip, stops_positions[stop_id,0] + x, stops_positions[stop_id,2] + z, 0))
            all_valid_states_count += 1
            _rollback_all(state_clip,stops_positions[stop_id,0] + x, stops_positions[stop_id,2] + z, i, 1)

all_valid_states = all_valid_states[:all_valid_states_count, :]

In [None]:
fig, ax = plt.subplots()
sc, = ax.plot([], [], 'o')  # placeholder line object
ax.set_xlabel('x')
ax.set_ylabel('z')
ax.set_title('Clip')
ax.set_xlim(-500, 500)
ax.set_ylim(-500, 500)
ax.set_aspect('equal')
ax.grid(True)

def plot_clip(clip_id):
    selected = all_valid_states[all_valid_states[:, 0] == clip_id]
    x = selected[:, 1]
    z = selected[:, 2]
    sc.set_data(x, z)
    ax.relim()
    ax.autoscale_view()
    ax.set_title(f'Clip ID: {clip_id}')
    fig.canvas.draw_idle()

# Interactive slider
interact(plot_clip, clip_id=widgets.IntSlider(min=0, max=CLIP_COUNT-1, step=1, value=0))

In [None]:
from scipy.spatial import cKDTree
from multiprocessing import Pool
import importlib
import RealTimePlanning_MultiProcess_Func as mpf
importlib.reload(mpf)

In [None]:
value_functions_precompute = np.zeros([CLIP_COUNT, pre_compute_table_x.shape[0], pre_compute_table_z.shape[0]])

In [None]:
%%time
display(progress_output)

ALPHA = .99
EPOCH = 50
RESTART_EPOCH = 10
LEARN_RATE = 0.1

PRE_COMPUTE_TABLE_INDICES = np.stack(np.meshgrid(pre_compute_table_x, pre_compute_table_z), axis=2).reshape(-1, 2)

def _generate_data(count, X, y):
    # force all the known path
    for s in all_valid_states[::50]:
        clip, x, z, rec = int(s[0]), s[1], s[2], s[3]
        X[clip, count[clip], 0] = x
        X[clip, count[clip], 1] = z
        y[clip, count[clip]] = 1.0 * (ALPHA ** rec)
        count[clip] += 1

    # fail
    for clip in range(CLIP_COUNT):

        selected = all_valid_states[all_valid_states[:, 0] == clip]
        if selected.shape[0] > 0:
        
            tree = cKDTree(selected[:, 1:3])

            sp = np.linspace(-500, 500, 201)
            grid_positions = np.stack(np.meshgrid(sp, sp), axis=2).reshape(-1, 2)
            
            # Query nearest neighbor distances
            distances, _ = tree.query(grid_positions, k=1)
    
            for i in range(grid_positions.shape[0]):
                if distances[i] > 4 and distances[i] < 20 :
                    X[clip, count[clip], 0] = grid_positions[i, 0]
                    X[clip, count[clip], 1] = grid_positions[i, 1]
                    y[clip, count[clip]] = 0
                    count[clip] += 1
        

def train_optimal_policy():
    progress_output.clear_output()

    scores = np.zeros([EPOCH, 4])
    
    with Pool() as pool:
        
        for epoch in range(EPOCH):
            display(f"epoch : [{epoch}]")
    
            if epoch % RESTART_EPOCH == 0:
                display(f"bootstrap")
                count = np.zeros([CLIP_COUNT], dtype=np.uint32)
                X = np.zeros([CLIP_COUNT, 100000, 2])
                y = np.zeros([CLIP_COUNT, 100000])
                _generate_data(count, X, y)
                
            #build a data set
            display(f"gather path data")
            reach_reward = 0

            for _ in range(20):
                for clip in range(CLIP_COUNT):
                    x = uniform(-500, 500)
                    z = uniform(-500, 500)
            
                    for steps in range(10):
                        if state_reward(clip, x, z) >= 1:
                            reach_reward += 1
                            break
                        
                        c, x_prime, z_prime, reward = use_optimal_policy(value_functions_precompute, ALPHA, clip, x, z)
                        
                        X[clip, count[clip], 0] = x
                        X[clip, count[clip], 1] = z
                        y[clip, count[clip]] = reward
                        count[clip] += 1
        
                        clip = c
                        x = x_prime
                        z = z_prime
    
            # found 
            display(f"found {reach_reward} succesful paths")
                
                    
            #train the value functions
            display(f"train the value functions")
            args = [(X[i, :count[i]], y[i, :count[i]], PRE_COMPUTE_TABLE_INDICES, pre_compute_table_x.shape[0]) for i in range(CLIP_COUNT)]
            results = pool.starmap(mpf.reach_train_value_function, args)
            value_functions_precompute[:, :, :] = np.stack(results)
            
            #compute bellman residual
            residuals = np.zeros([CLIP_COUNT * 10000])
            c = 0
    
            display(f"re evaluate Value functions")
            for i in range(CLIP_COUNT):
                for j in range(count[i]):
                    _, _, _, reward = use_optimal_policy(value_functions_precompute, ALPHA, i, X[i,j, 0], X[i,j, 1])
    
                    residuals[c] = np.abs(y[i, j] - reward)
                    
                    y[i, j] += LEARN_RATE * (reward - y[i, j])
                    c+=1
    
            progress_output.clear_output()
            display(f"residuals; mean {residuals[:c].mean()} max {residuals[:c].max()} total states count {np.sum(count)}")
            scores[epoch, 0] = residuals[:c].min()
            scores[epoch, 1] = residuals[:c].max()
            scores[epoch, 2] = residuals[:c].mean()
            scores[epoch, 3] = reach_reward
    
    with open('realtime_planning_reach_position_value_functions.dat', 'wb') as f:
        pickle.dump((value_functions_precompute, scores), f)

    return value_functions_precompute, scores

# with progress_output:
#     value_functions_precompute, scores = train_optimal_policy()

with open('realtime_planning_reach_position_value_functions.dat', 'rb') as f:
    value_functions_precompute, scores = pickle.load(f)

In [None]:
x = np.arange(EPOCH)

fig, axs = plt.subplots(2, 1, figsize=(10, 8), sharex=True)

# First subplot
axs[0].plot(x, scores[:, 2], label='Mean', color='blue')
axs[0].fill_between(x, scores[:, 0], scores[:, 1], color='lightblue', alpha=0.4, label='Min-Max Range')
axs[0].set_title('Scores over Epoch (Mean and Range)')
axs[0].set_ylabel('Value')
axs[0].legend()
axs[0].grid(True)

# Second subplot
axs[1].plot(x, scores[:, 3], label='Found', color='blue')
axs[1].set_title('Scores over Epoch (Found)')
axs[1].set_xlabel('Index')
axs[1].set_ylabel('Value')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()


In [None]:
from matplotlib import cm
plt.clf()
X, Y = np.meshgrid(pre_compute_table_x,pre_compute_table_z)

fig, ax = plt.subplots(subplot_kw={"projection": "3d"})

# Plot the surface.
surf = ax.plot_surface(X, Y, value_functions_precompute[16], cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)
ax.set_xlabel('X Axis')
ax.set_ylabel('Z Axis')
ax.set_zlabel('Value')

plt.show()

In [None]:
player = Player(motion_clips)
player.set_next_clip(119)
player.target = np.array([-200, 0, 500], dtype=np.float32)
player.wait_counter = 0

def render(frame):

    if player.next_clip is None and player.current_clip.motion.timings[1] <= player.current_clip.frame:
        if player.current_clip.motion._id not in stops_indices:

            pos = player.target - player.positions[0, :]
            pos = lab.utils.quat_mul_vec(lab.utils.quat_inv(player.quaternions[0, :]), pos)

            # if we reach the reward we can transition to a stop
            if state_reward(player.current_clip.motion._id, pos[0], pos[2]) > 1:
                index = physics_costs[player.current_clip.motion._id, stops_indices].argmin()
                player.set_next_clip(stops_indices[index])

            else:
                next_clip, x_prime, z_prime, reward = use_optimal_policy(value_functions_precompute, ALPHA, player.current_clip.motion._id, pos[0], pos[2])
                display((pos, next_clip, x_prime, z_prime, reward))
            
                player.set_next_clip(next_clip)
        else:
            player.wait_counter += 1
            if player.wait_counter > 15:
                player.target = np.array([uniform(-400, 400), 0, uniform(-400, 400)], dtype=np.float32)
                player.set_next_clip(119)
                player.wait_counter = 0

    player.tick()
    
    q = player.quaternions
    p = player.positions
 
    a = lab.utils.quat_to_mat(q, p)
    viewer.set_shadow_poi(p[0])
    
    viewer.begin_shadow()
    viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.draw(character, a)
    d = lab.utils.quat_to_mat(np.array([1,0,0,0], dtype=np.float32), player.target)
    viewer.draw(target, d)
    
    viewer.end_display()

    viewer.disable(depth_test=True)

    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(max=MAX_STEP_LEN-1)
)
viewer

---
## Motion Groups

In [None]:
class MotionGroup:
    def __init__(self, indices, weights, id):
        self.indices = np.array(indices, dtype=np.uint32)
        self.weights = np.array(weights, dtype=np.float32)
        self.weights/= np.sum(self.weights)
        self.timings = np.sum(clips_timings[self.indices, :4] * self.weights[:, np.newaxis], axis=0).astype(np.uint32)
        self.side = clips_sides[indices[0]]
        self.quats = np.array([1,0,0,0], dtype=np.float32)[np.newaxis,...].repeat(MAX_STEP_LEN * bone_count, axis=0).reshape(MAX_STEP_LEN, bone_count, 4)
        self.pos = np.array([0,0,0], dtype=np.float32).repeat(MAX_STEP_LEN * bone_count).reshape(MAX_STEP_LEN, bone_count, 3)
        self.constraints_q = np.zeros([2, 4], dtype=np.float32)
        self.constraints_p = np.zeros([2, 3], dtype=np.float32)
        self._id = id
        

        self._interpolated_timings = np.zeros([self.indices.shape[0], self.timings[3]+1])
        for i in range(self.indices.shape[0]):
            self._interpolated_timings[i, :] = np.linspace(0, clips_timings[self.indices[i], 3], self.timings[3]+1)

        def _accumulate(accum, q):
            q_inv = -q
            
            replace_mask = np.sum(accum * q, axis=-1) < np.sum(accum * q_inv, axis=-1)
            replace_mask = replace_mask[..., np.newaxis]
            accum += replace_mask * q_inv + (1.0 - replace_mask) * q
        
        for frame in range(self.timings[3]+1):
            self.quats[frame, ...] = 0
            for i in range(self.indices.shape[0]):
                source_frame = np.floor(self._interpolated_timings[i, frame]).astype(np.uint32)
                source_interpolation = self._interpolated_timings[i, frame] - source_frame
                

                _accumulate(self.quats[frame, ...], clips_q[self.indices[i], source_frame, :, :] * (1.0-source_interpolation) * self.weights[i])
                _accumulate(self.quats[frame, ...], clips_q[self.indices[i], source_frame+1, :, :] * (source_interpolation) * self.weights[i])

                self.pos[frame, ...] += clips_p[self.indices[i], source_frame, :, :] * (1.0-source_interpolation) * self.weights[i]
                self.pos[frame, ...] += clips_p[self.indices[i], source_frame+1, :, :] * (source_interpolation) * self.weights[i]
        self.quats[...] = lab.utils.quat_normalize(self.quats)

        _, gpos = lab.utils.quat_fk(self.quats, self.pos, animations[0].parents)
        if self.side == 0:
            self.constraints_q[0, :], self.constraints_p[0, :] = compute_constraint_qp(
                gpos, 
                self.timings[0],
                foottag_indices[0],
                foottag_indices[1]
            )
            self.constraints_q[1, :], self.constraints_p[1, :] = compute_constraint_qp(
                gpos, 
                self.timings[2],
                foottag_indices[2],
                foottag_indices[3]
            )
        else:
            self.constraints_q[0, :], self.constraints_p[0, :] = compute_constraint_qp(
                gpos, 
                self.timings[0],
                foottag_indices[2],
                foottag_indices[3]
            )
            self.constraints_q[1, :], self.constraints_p[1, :] = compute_constraint_qp(
                gpos, 
                self.timings[2],
                foottag_indices[0],
                foottag_indices[1]
            )


In [None]:
def render(frame, w1=1.0, w2=0.0, w3=0.0):

    motion = MotionGroup([1,8,14], [w1, w2, w3], 0)
    q = motion.quats[frame]
    p = motion.pos[frame]
    
    a = lab.utils.quat_to_mat(q, p)
    viewer.set_shadow_poi(p[0])
    
    viewer.begin_shadow()
    viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.draw(character, a)

    contacts_matrices = np.eye(4, dtype=np.float32)[np.newaxis,...].repeat(2, axis=0)
    contacts_matrices = lab.utils.quat_to_mat( motion.constraints_q,  motion.constraints_p )

    viewer.draw(target, contacts_matrices)
    
    viewer.end_display()

    viewer.disable(depth_test=True)

    viewer.draw_axis(contacts_matrices, 20)
    viewer.draw_axis(character.world_skeleton_xforms(a), 5)
    viewer.draw_lines(character.world_skeleton_lines(a))
    
    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(max=MAX_STEP_LEN-1)
)
viewer

### Helpers

* 0 -> 9 - walks curved
* 10 -> 23 - turns
* 24 - walk straight  
* 25 - start  
* 26 - stop  
* 27 - stop  
* 28 - stop  
* 29 - walk real slow  
* 30 - walk slow  
* 31 -> 46 -  stop  

In [None]:
# sorted_clip = np.arange(CLIP_COUNT)
# mask = np.zeros(sorted_clip.shape, bool)
# mask[np.argwhere(clips_sources < 47)[:, 0]] = True
# mask[np.argwhere(clips_sources < 31)[:, 0]] = False
# mask[np.argwhere(clips_sides == 0)[:, 0]] = False
# sorted_clip = sorted_clip[mask]
# sorted_clip

In [None]:
# sorted_clip = np.arange(CLIP_COUNT)
# mask = np.zeros(sorted_clip.shape, bool)
# mask[np.argwhere(clips_sources > 30 )[:, 0]] = True
# mask[np.argwhere(clips_sources == 24)[:, 0]] = True
# mask[stops_indices] = False
# mask[np.argwhere(clips_sides == 0)[:, 0]] = False
# sorted_clip = sorted_clip[mask]
# sorted_clip

In [None]:
# clip_is = np.arange(CLIP_COUNT)
# angles = np.atan2(
#     2 * clips_q[clip_is, clips_timings[:, 1], 0, 0] * clips_q[clip_is, clips_timings[:, 1], 0, 2], 
#     1.0 - (2 * clips_q[clip_is, clips_timings[:, 1], 0, 2] * clips_q[clip_is, clips_timings[:, 1], 0, 2])
# )*180.0 / np.pi
# distances = np.sqrt(clips_p[clip_is, clips_timings[:, 1], 0, 0]**2 + clips_p[clip_is, clips_timings[:, 1], 0, 2]**2)

In [None]:
# def group_indices_by_distance(indices, values, max_distance=2):
#     groups = []
#     current_group = [indices[0]]
#     current_sign = np.sign(values[0])
#     for i in range(1, len(indices)):
#         sign = np.sign(values[i])
#         if sign != current_sign or abs(values[i] - values[i-1]) >= max_distance:
#             if 2 <= len(current_group) <= 3:
#                 groups.append(current_group.copy())
#             current_group = [indices[i-1], indices[i]]
#             current_sign = sign
#         else:
#             current_group.append(indices[i])
#             if len(current_group) == 3:
#                 groups.append(current_group.copy())
#                 current_group = [indices[i]]
#                 current_sign = sign
#     if 2 <= len(current_group) <= 3:
#         groups.append(current_group)
#     return groups

In [None]:
# pairs = list(zip(sorted_clip[np.argsort(angles[sorted_clip])], sorted(angles[sorted_clip])))
# indices, values = zip(*pairs)
# grouped_indices = group_indices_by_distance(list(indices), list(values), max_distance=2)
# print([[int(i) for i in group] for group in grouped_indices])

# list(zip ( sorted_clip[np.argsort(angles[sorted_clip])], sorted(angles[sorted_clip]) ))

In [None]:
# pairs = list(zip(sorted_clip[np.argsort(distances[sorted_clip])], sorted(distances[sorted_clip])))
# indices, values = zip(*pairs)
# grouped_indices = group_indices_by_distance(list(indices), list(values), max_distance=5)
# print([[int(i) for i in group] for group in grouped_indices])

# list(zip ( sorted_clip[np.argsort(distances[sorted_clip])], sorted(distances[sorted_clip]) ))

### Build the motion groups

In [None]:
# motion_groups_indices = [
#     # rotate
#     [80, 79], [79, 74], [74,73,85], [85,87], [87,81], [81,86], [84, 88], [88,83], [83,82], [82, 78], [78,77], [77, 75, 76],
#     [0,1,7], [7,6,8], [8,14], [14,9,15], [15,13], [10,12,11], [11,17,16], [16,4,5], [5,3], [3,2],
#     # turn
#     [112, 114], [114, 108, 110], [110, 106], [106, 115], [115, 104, 111], [111, 105, 113], [113, 109, 103], [103, 107], [107, 91, 95], [95, 99, 93], [93, 101, 97], [97, 89], [98, 102, 96], [96, 92], [92, 100], [100, 90, 94],
#     [45, 47], [47, 41], [41, 43], [43, 39], [39, 37], [37, 35], [46, 38], [38, 40, 42], [42, 18], [18, 34, 44], [44, 28], [28, 36, 21], [21, 30, 32], [32, 26], [26, 33, 24], [24, 22], [22, 31], [31, 19], [19, 29], [29, 25], [25, 27, 20], [20, 23],

#     # walk
#     [57, 64], [64, 59, 62], [62, 63, 49], [49, 58, 48], [48, 61, 60],
#     [117, 141, 118], [118, 116, 134], [134, 136, 139], [139, 135, 140], [140, 137], [137, 138],

#     #stop indices
#     [131, 121, 132], [132, 128, 127], [127, 130, 129], [129, 133, 126],
#     [71, 67, 70], [70, 69, 66], [66, 52], [52, 72, 68], [68, 51, 65]
# ]

motion_groups_indices = [
    # rotate
    [80, 79], [74,73,85], [85,87], [87,81], [81,86], [84, 88], [88,83], [83,82], [82, 78], [78,77], [77, 75, 76],
    [0,1], [7,6], [6,8], [8,14], [14,9,15], [15,13], [10,12,11], [11,17,16], [16,4,5], [5,3], [3,2],
    # turn
    [112, 114], [114, 108, 110], [110, 106], [115, 104, 111], [111, 105, 113], [113, 109, 103], [103, 107], [107, 91, 95], [95, 99, 93], [93, 101, 97], [97, 89], [98, 102, 96], [96, 92], [100, 94], [90, 94],
    [45, 47], [47, 41], [41, 43], [43, 39], [39, 37], [37, 35], [46, 38], [38, 40, 42], [42, 18], [18, 34, 44], [44, 28], [28, 36, 21], [21, 30, 32], [32, 26], [26, 33, 24], [24, 22], [22, 31], [29, 25], [25, 27, 20], [20, 23],

    # walk
    [57, 64], [64, 59, 62], [62, 63, 49], [49, 58, 48], [48, 61, 60],
    [117, 141, 118], [118, 116, 134], [134, 136, 139], [139, 135, 140], [140, 137], [137, 138],

    #stop indices
    [131, 121, 132], [132, 128, 127], [127, 130, 129], [129, 133, 126],
    [71, 67, 70], [70, 69, 66], [66, 52], [52, 72, 68], [68, 51, 65]
]

In [None]:
weights_1 = np.ones([11, 1], dtype=np.float32)

weights_2 = np.zeros([11, 2], dtype=np.float32)
weights_2[:, 0] = 1. - np.linspace(0, 1, 12, dtype=np.float32)[1:]
weights_2[:, 1] = np.linspace(0, 1, 12, dtype=np.float32)[1:]

weights_3 = np.zeros([11, 3], dtype=np.float32)
weights_3[:, 0] = 1.0 - np.linspace(0, 1, 12, dtype=np.float32)[1:]
weights_3[:6, 1] = np.linspace(0, 1, 7, dtype=np.float32)[1:]
weights_3[5:, 1] = 1.0- np.linspace(0, 1, 6, dtype=np.float32)
weights_3[:, 2] = np.linspace(0, 1, 12, dtype=np.float32)[1:]
weights_3 = weights_3 / np.sum(weights_3, axis=1, keepdims=True)

In [None]:
motion_groups = []
for indices in motion_groups_indices:
    weights = weights_1
    if len(indices) == 2:
        weights = weights_2
    if len(indices) == 3:
        weights = weights_3

    for w in weights:
        mg = MotionGroup(indices, w, len(motion_groups))
        motion_groups.append(mg)
GROUP_COUNT = len(motion_groups)
GROUP_COUNT

In [None]:
def render(frame, group_id=0, weights_id=0):

    motion = motion_groups[group_id * 11 + weights_id]
    display(motion.indices)
    q = motion.quats[frame]
    p = motion.pos[frame]
    
    a = lab.utils.quat_to_mat(q, p)
    viewer.set_shadow_poi(p[0])
    
    viewer.begin_shadow()
    viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.draw(character, a)

    contacts_matrices = np.eye(4, dtype=np.float32)[np.newaxis,...].repeat(2, axis=0)
    contacts_matrices = lab.utils.quat_to_mat( motion.constraints_q,  motion.constraints_p )

    viewer.draw(target, contacts_matrices)
    
    viewer.end_display()

    viewer.disable(depth_test=True)

    viewer.draw_axis(contacts_matrices, 20)
    viewer.draw_axis(character.world_skeleton_xforms(a), 5)
    viewer.draw_lines(character.world_skeleton_lines(a))

    for i in motion.indices:
        q = clips_q[i, frame, :, :]
        p = clips_p[i, frame, :, :]
        a = lab.utils.quat_to_mat(q, p)
        viewer.draw_lines(character.world_skeleton_lines(a))
    
    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(max=MAX_STEP_LEN-1),
    group_id=widgets.IntSlider(min=0, max=GROUP_COUNT//11-1, step=1, value=0),
    weights_id=widgets.IntSlider(min=0, max=10, step=1, value=0)
)
viewer

### Cost

In [None]:
%%time

display(progress_output)

# with progress_output:
#     physics_costs, delta_theta, delta_x, delta_z = pre_compute_transitions_costs(motion_groups, 'realtime_planning_animations_group_costs.dat')

with open('realtime_planning_animations_group_costs.dat', 'rb') as f:
    physics_costs, delta_theta, delta_x, delta_z = pickle.load(f)

### Learn

In [None]:
stops_indices = np.arange(len(motion_groups) - 9 * 11, len(motion_groups))
stops_indices

In [None]:
# compute local position to end on 0,0,0
stops_positions = []
for id in stops_indices:
    player = Player(motion_groups)
    player.set_next_clip(id)
    player.current_clip.quaternions[:, 0], player.current_clip.positions[:, 0] = lab.utils.qp_mul(
        lab.utils.qp_inv((player.current_clip.quaternions[player.current_clip.motion.timings[3]-1, 0][np.newaxis,...], player.current_clip.positions[player.current_clip.motion.timings[3]-1, 0][np.newaxis,...])),
        (player.current_clip.quaternions[:, 0], player.current_clip.positions[:, 0])
    )
    pos =  - player.current_clip.positions[player.current_clip.motion.timings[1], 0]
    pos = lab.utils.quat_mul_vec(lab.utils.quat_inv(player.current_clip.quaternions[player.current_clip.motion.timings[1], 0]), pos)
    stops_positions.append(pos)
stops_positions = np.array(stops_positions, dtype=np.float32)
stops_positions

In [None]:
all_valid_states = np.zeros([100000000, 4], dtype=np.float32)
all_valid_states_count = 0

def _rollback_all(state_clip, state_x, state_z, action_id, recursion_counter):
    global all_valid_states_count
    previous_clip = physics_costs[:, state_clip].argsort()[action_id]
    clip_prime, x_prime, z_prime = transition_inv(previous_clip, state_clip, state_x, state_z )
    if clip_prime not in stops_indices:
        all_valid_states[all_valid_states_count, :] = ((clip_prime, x_prime, z_prime, recursion_counter))
        all_valid_states_count += 1
        recursion_counter += 1
        if recursion_counter < 4:
            for i in range(5):
                _rollback_all(clip_prime, x_prime, z_prime, i, recursion_counter)

for state_clip in stops_indices:
    stop_id = np.argwhere(stops_indices == state_clip)[0][0]

    for _ in range(1):
        x = uniform(-20, 20)
        z = uniform(-20, 20)
    
        for i in range(20):
            all_valid_states[all_valid_states_count, :] = ((state_clip, stops_positions[stop_id,0] + x, stops_positions[stop_id,2] + z, 0))
            all_valid_states_count += 1
            _rollback_all(state_clip,stops_positions[stop_id,0] + x, stops_positions[stop_id,2] + z, i, 1)

all_valid_states = all_valid_states[:all_valid_states_count, :]
all_valid_states.shape

In [None]:
pre_compute_table_x = np.linspace(-1000, 1000, 1001)
pre_compute_table_z = np.linspace(-1000, 1000, 1001)

clip_arange = np.arange(GROUP_COUNT)
clip_vf_arange = np.arange(int(GROUP_COUNT/11))[np.newaxis, :].repeat(11)

clip_arange_no_stop = np.arange(GROUP_COUNT)
mask = np.ones(clip_arange_no_stop.shape, bool)
mask[stops_indices] = False
clip_arange_no_stop = clip_arange_no_stop[mask]

clip_vf_arange_no_stop = np.arange(int(GROUP_COUNT/11))[np.newaxis, :].repeat(11)
mask = np.ones(clip_vf_arange_no_stop.shape, bool)
mask[stops_indices] = False
clip_vf_arange_no_stop = clip_vf_arange_no_stop[mask]


def use_optimal_policy(value_functions, alpha, state_clip, x, z, no_stop=False):
    indices = clip_arange
    indices_vf = clip_vf_arange
    if no_stop:
        indices = clip_arange_no_stop
        indices_vf = clip_vf_arange_no_stop
        
    _, x_prime, z_prime = transition(state_clip, x, z, indices)

    reward = transition_reward(state_clip, indices) * .4
    reward += alpha * get_value_function(value_functions, indices_vf, x_prime, z_prime)

    picked = np.argmax(reward)

    # now let's forbid taking a transition toward a stop if it fails
    if no_stop==False and state_reward(clip_arange[picked], x_prime[picked], z_prime[picked]) < -99:
        return use_optimal_policy(value_functions, alpha, state_clip, x, z, True)

    return indices[picked], x_prime[picked], z_prime[picked], reward[picked] + state_reward(state_clip, x, z)

In [None]:
GROUP_VALUE_COUNT = int(GROUP_COUNT/11)
value_functions_precompute = np.zeros([GROUP_VALUE_COUNT, pre_compute_table_x.shape[0], pre_compute_table_z.shape[0]])

In [None]:
%%time
display(progress_output)

ALPHA = .99
EPOCH = 15
RESTART_EPOCH = 100
LEARN_RATE = 0.1
REPEAT_FITTING = 1

PRE_COMPUTE_TABLE_INDICES = np.stack(np.meshgrid(pre_compute_table_x, pre_compute_table_z), axis=2).reshape(-1, 2)

def _generate_data(count, X, y, clip_index):
    # force all the known path
    for s in all_valid_states[::3]:
        clip, x, z, rec = int(s[0]), s[1], s[2], s[3]
        clip_group = int(clip/11)
        clip_index[clip_group, count[clip_group]] = clip
        X[clip_group, count[clip_group], 0] = x
        X[clip_group, count[clip_group], 1] = z
        y[clip_group, count[clip_group]] = 1.0 * (ALPHA ** rec)
        count[clip_group] += 1
        

def train_optimal_policy():
    progress_output.clear_output()

    scores = np.zeros([EPOCH, 4])
    
    with Pool() as pool:
        
        for epoch in range(EPOCH):
            display(f"epoch : [{epoch}]")

            if epoch % RESTART_EPOCH == 0:
                display(f"bootstrap")
                count = np.zeros([GROUP_VALUE_COUNT], dtype=np.uint32)
                clip_index = np.zeros([GROUP_VALUE_COUNT, 100000], dtype=np.uint32)
                X = np.zeros([GROUP_VALUE_COUNT, 100000, 2])
                y = np.zeros([GROUP_VALUE_COUNT, 100000])
                _generate_data(count, X, y, clip_index)

            # else:
            #     # keep only alf of the previous data
            #     for i in range(GROUP_VALUE_COUNT):
            #         count[i] /= 2
                
            #build a data set
            display(f"gather path data")
            reach_reward = 0

            for _ in range(1):
                for clip in range(GROUP_COUNT):
                    # dist = 100 + 500 * (float(epoch) / float(EPOCH))
                    dist = 500
                    x = uniform(-dist, dist)
                    z = uniform(-dist, dist)
            
                    for steps in range(10):
                        if state_reward(clip, x, z) >= 1:
                            reach_reward += 1
                            break
                        
                        c, x_prime, z_prime, reward = use_optimal_policy(value_functions_precompute, ALPHA, clip, x, z)
                        
                        clip_group = int(clip/11)

                        clip_index[clip_group, count[clip_group]] = clip
                        X[clip_group, count[clip_group], 0] = x
                        X[clip_group, count[clip_group], 1] = z
                        y[clip_group, count[clip_group]] = reward
                        count[clip_group] += 1
        
                        clip = c
                        x = x_prime
                        z = z_prime
    
            # found 
            display(f"found {reach_reward} succesful paths")
                
            for _ in range(REPEAT_FITTING):    
                #train the value functions
                display(f"train the value functions")
                args = [(X[i, :count[i]], y[i, :count[i]], PRE_COMPUTE_TABLE_INDICES, pre_compute_table_x.shape[0]) for i in range(GROUP_VALUE_COUNT)]
                results = pool.starmap(mpf.reach_train_value_function, args)
                value_functions_precompute[:, :, :] = np.stack(results)
                
                #compute bellman residual
                residuals = np.zeros([GROUP_VALUE_COUNT * 10000])
                c = 0
        
                display(f"re evaluate Value functions")
                for i in range(GROUP_VALUE_COUNT):
                    for j in range(count[i]):
                        _, _, _, reward = use_optimal_policy(value_functions_precompute, ALPHA, clip_index[i,j], X[i,j, 0], X[i,j, 1])
        
                        residuals[c] = np.abs(y[i, j] - reward)
                        
                        y[i, j] += LEARN_RATE * (reward - y[i, j])
                        c+=1
        
                progress_output.clear_output()
                display(f"residuals; mean {residuals[:c].mean()} max {residuals[:c].max()} total states count {np.sum(count)}")
                scores[epoch, 0] = residuals[:c].min()
                scores[epoch, 1] = residuals[:c].max()
                scores[epoch, 2] = residuals[:c].mean()
                scores[epoch, 3] = reach_reward
    
    with open('realtime_planning_reach_position_group_value_functions.dat', 'wb') as f:
        pickle.dump((value_functions_precompute, scores), f)

    return value_functions_precompute, scores

# with progress_output:
#     value_functions_precompute, scores = train_optimal_policy()

with open('realtime_planning_reach_position_group_value_functions.dat', 'rb') as f:
    value_functions_precompute, scores = pickle.load(f)

In [None]:
x = np.arange(EPOCH)

fig, axs = plt.subplots(2, 1, figsize=(10, 8), sharex=True)

# First subplot
axs[0].plot(x, scores[:, 2], label='Mean', color='blue')
axs[0].fill_between(x, scores[:, 0], scores[:, 1], color='lightblue', alpha=0.4, label='Min-Max Range')
axs[0].set_title('Scores over Epoch (Mean and Range)')
axs[0].set_ylabel('Value')
axs[0].legend()
axs[0].grid(True)

# Second subplot
axs[1].plot(x, scores[:, 3], label='Found', color='blue')
axs[1].set_title('Scores over Epoch (Found)')
axs[1].set_xlabel('Index')
axs[1].set_ylabel('Value')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

In [None]:
player = Player(motion_groups)
player.set_next_clip(62 * 11)
player.target = np.array([-200, 0, 500], dtype=np.float32)
player.wait_counter = 0

def render(frame):

    if player.next_clip is None and player.current_clip.motion.timings[1] <= player.current_clip.frame:
        if player.current_clip.motion._id not in stops_indices:

            pos = player.target - player.positions[0, :]
            pos = lab.utils.quat_mul_vec(lab.utils.quat_inv(player.quaternions[0, :]), pos)

            # if we reach the reward we can transition to a stop
            if state_reward(player.current_clip.motion._id, pos[0], pos[2]) > 1:
                index = physics_costs[player.current_clip.motion._id, stops_indices].argmin()
                player.set_next_clip(stops_indices[index])

            else:
                next_clip, x_prime, z_prime, reward = use_optimal_policy(value_functions_precompute, ALPHA, player.current_clip.motion._id, pos[0], pos[2])
                display((pos, next_clip, x_prime, z_prime, reward))
            
                player.set_next_clip(next_clip)
        else:
            player.wait_counter += 1
            if player.wait_counter > 15:
                new_target = np.array([uniform(-300, 300), 0, uniform(-300, 300)], dtype=np.float32)
                while (np.sum((player.target - new_target)**2) < 250):
                    new_target = np.array([uniform(-300, 300), 0, uniform(-300, 300)], dtype=np.float32)
                player.target = new_target
                player.set_next_clip(62 * 11)
                player.wait_counter = 0

    player.tick()
    
    q = player.quaternions
    p = player.positions
 
    a = lab.utils.quat_to_mat(q, p)
    viewer.set_shadow_poi(p[0])
    
    viewer.begin_shadow()
    viewer.draw(character, a)
    viewer.end_shadow()
    
    viewer.begin_display()
    viewer.draw_ground()
    viewer.draw(character, a)
    d = lab.utils.quat_to_mat(np.array([1,0,0,0], dtype=np.float32), player.target)
    viewer.draw(target, d)
    
    viewer.end_display()

    viewer.disable(depth_test=True)

    viewer.execute_commands()
    
interact(
    render, 
    frame=lab.Timeline(max=MAX_STEP_LEN-1)
)
viewer