In [9]:
import subprocess
import numpy as np


from robopianist.suite.tasks import self_actuated_piano
from robopianist.suite.tasks import piano_with_shadow_hands
from dm_env_wrappers import CanonicalSpecWrapper
from robopianist.wrappers import PianoSoundVideoWrapper
from robopianist import music
from mujoco_utils import composer_utils
import dm_env

### Self-actuated piano task

In [29]:
task = self_actuated_piano.SelfActuatedPiano(
    midi=music.load("TwinkleTwinkleRousseau"),
    change_color_on_activation=True,
    trim_silence=True,
    control_timestep=0.01,
)

env = composer_utils.Environment(
    recompile_physics=False, task=task, strip_singleton_obs_buffer_dim=True
)

env = PianoSoundVideoWrapper(
    env,
    record_every=1,
    camera_id="piano/back",
    record_dir=".",
)

In [30]:
action_spec = env.action_spec()
min_ctrl = action_spec.minimum
max_ctrl = action_spec.maximum
print(f"Action dimension: {action_spec.shape}")

Action dimension: (89,)


In [31]:
print("Observables:")
timestep = env.reset()
dim = 0
for k, v in timestep.observation.items():
    print(f"\t{k}: {v.shape} {v.dtype}")
    dim += np.prod(v.shape)
print(f"Observation dimension: {dim}")

Observables:
	goal: (89,) float64
	piano/activation: (88,) float64
	piano/sustain_activation: (1,) float64
Observation dimension: 178


In [32]:
class Oracle:
    def __call__(self, timestep: dm_env.TimeStep) -> np.ndarray:
        if timestep.reward is not None:
            assert timestep.reward == 0
        # Only grab the next timestep's goal state.
        goal = timestep.observation["goal"][: task.piano.n_keys]
        key_idxs = np.flatnonzero(goal)
        # For goal keys that should be pressed, set the action to the maximum
        # actuator value. For goal keys that should be released, set the action to
        # the minimum actuator value.
        action = min_ctrl.copy()
        action[key_idxs] = max_ctrl[key_idxs]
        # Grab the sustain pedal action.
        action[-1] = timestep.observation["goal"][-1]
        return action

In [60]:
policy = Oracle()

timestep = env.reset()
while not timestep.last():
    action = policy(timestep)
    timestep = env.step(action)

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.06656816 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.06656816 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         1.        ]


ValueError: operands could not be broadcast together with shapes (89,) (45,) (89,) 

In [37]:
#   The command to quite VLC after finished
QUIT_VLC_COMMAND = 'vlc://quit'
def play_video( video_path:str ):
    #   Using cvlc -> not interface
    subprocess.Popen( ['cvlc', video_path, QUIT_VLC_COMMAND ] )

In [38]:
play_video( env.latest_filename )

[000063f68b6d0810] dummy interface: using the dummy interface module...
[00007465e8004a70] gl gl: Initialized libplacebo v4.192.1 (API v192)
libva info: VA-API version 1.14.0
libva info: Trying to open /usr/lib/x86_64-linux-gnu/dri/radeonsi_drv_video.so
libva info: Found init function __vaDriverInit_1_14
libva info: va_openDriver() returns 0
[0000746600c3ae70] avcodec decoder: Using Mesa Gallium driver 23.2.1-1ubuntu3.1~22.04.2 for RENOIR (renoir, LLVM 15.0.7, DRM 3.54, 6.5.0-26-generic) for hardware decoding
[0000746600c4f200] idummy demux: command `quit'


### Piano with shadow hand

In [55]:
task = piano_with_shadow_hands.PianoWithShadowHands(
    change_color_on_activation=True,
    midi = music.load("TwinkleTwinkleRousseau"),
    trim_silence=True,
    control_timestep=0.05,
    gravity_compensation=False,
    primitive_fingertip_collisions=False,
    reduced_action_space=False,
    n_steps_lookahead=10,
    disable_fingering_reward=False,
    disable_forearm_reward=False,
    disable_colorization=False,
    disable_hand_collisions=False,
    attachment_yaw=0.0,
)

env = composer_utils.Environment(
    task=task, strip_singleton_obs_buffer_dim=True, recompile_physics=False
)

env = PianoSoundVideoWrapper(
    env,
    record_every=1000,
    camera_id="piano/back",
    record_dir=".",
)

env = CanonicalSpecWrapper(env)

In [56]:
action_spec = env.action_spec()
print(f"Action dimension: {action_spec.shape}")

Action dimension: (45,)


In [57]:
timestep = env.reset()
dim = 0
for k, v in timestep.observation.items():
    print(f"\t{k}: {v.shape} {v.dtype}")
    dim += int(np.prod(v.shape))
print(f"Observation dimension: {dim}")

	goal: (979,) float64
	fingering: (10,) float64
	piano/state: (88,) float64
	piano/sustain_state: (1,) float64
	rh_shadow_hand/joints_pos: (26,) float64
	lh_shadow_hand/joints_pos: (26,) float64
Observation dimension: 1130


In [58]:

class Policy:
    def __init__(self) -> None:
        self.reset()

    def reset(self) -> None:
        self._idx = 0
        self._actions = np.load("/home/ultra/work/oeipaca/robopianist/examples/twinkle_twinkle_actions.npy")

    def __call__(self, timestep: dm_env.TimeStep) -> np.ndarray:
        del timestep  # Unused.
        actions = self._actions[self._idx]
        self._idx += 1
        return actions

In [59]:
policy = Policy()

timestep = env.reset()
while not timestep.last():
    action = policy(timestep)
    timestep = env.step(action)

TimeStep(step_type=<StepType.MID: 1>, reward=2.67118109388404, discount=1.0, observation={'goal': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
  

In [53]:
play_video( env.latest_filename )

[000061d5d7706c20] dummy interface: using the dummy interface module...
[000077d954004a70] gl gl: Initialized libplacebo v4.192.1 (API v192)
libva info: VA-API version 1.14.0
libva info: Trying to open /usr/lib/x86_64-linux-gnu/dri/radeonsi_drv_video.so
libva info: Found init function __vaDriverInit_1_14
libva info: va_openDriver() returns 0
[000077d968c25390] avcodec decoder: Using Mesa Gallium driver 23.2.1-1ubuntu3.1~22.04.2 for RENOIR (renoir, LLVM 15.0.7, DRM 3.54, 6.5.0-26-generic) for hardware decoding
[000077d968c39b50] idummy demux: command `quit'
