In [1]:
import os
import json
import h5py
import numpy as np

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.env_utils as EnvUtils
import robomimic.utils.obs_utils as ObsUtils
import imageio

import torch
import torch.nn as nn
import torch.nn.functional as F 
from torch.optim import Adam 
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from matplotlib import pyplot as plt
from copy import deepcopy
import datetime

In [2]:
device= torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


  return torch._C._cuda_getDeviceCount() > 0


### Load Robomimic Lift Dataset

In [3]:
dataset_path = "/home/ns/robomimic/datasets/lift/ph/low_dim_v141.hdf5"
f = h5py.File(dataset_path, "r")
demos = list(f["data"].keys())
num_demos = len(demos)
print(f'Number of demos: {num_demos}')

Number of demos: 200


In [4]:
select_keys=['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']

In [5]:
# we will use a small dataset of first 20 demonstration for faster training
n_demo=20
actions_list=[]
obs_list=[]
for i in range(n_demo):
    demo_id='demo_{}'.format(i)
    traj=f['data'][demo_id]
 
    actions=traj['actions']
    select_obs=np.hstack( [traj['obs'][key] for key in select_keys] ) 
    actions_list.append(actions)
    obs_list.append(select_obs)

actions_list=np.concatenate(actions_list)
obs_list=np.concatenate(obs_list)
obs_list.shape, actions_list.shape

((1042, 19), (1042, 7))

In [6]:
data_loader = DataLoader( list(zip(obs_list, actions_list)), batch_size=64, shuffle=True)

batch=next(iter(data_loader))
states,actions = batch
states.shape,actions.shape

(torch.Size([64, 19]), torch.Size([64, 7]))

In [7]:
action_dim=actions.shape[1]
state_dim=states.shape[1]
print(state_dim, action_dim)

19 7


### Model

In [8]:
class MLP(nn.Module):
    def __init__(self, input_dim, size=32):
        super(MLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim,size),
            nn.ReLU(), 
            nn.Linear(size,size),
            nn.ReLU() 
        )
    def forward(self,x):
        x = self.net(x)
        return x
    
class RegNet(MLP):
    def __init__(self, input_dim , size, action_dim):
        super(RegNet, self).__init__(input_dim, size)
        self.decoder = nn.Linear(size, action_dim)
    def forward(self,x):
        x = self.net(x)
        x = self.decoder(x)
        return x

### Training

In [9]:
learning_rate = 1e-4

bc = RegNet(state_dim, 64, action_dim)
criterion = nn.MSELoss() 
optimizer = Adam(bc.parameters(), lr = learning_rate)

In [10]:
loss_list = [] 
n_epoch = 3_000
 
for itr in range(0, n_epoch+1):
    total_loss = 0
    b=0
    for batch_states, batch_actions in data_loader: 
        y_pred = bc(batch_states.float())
        loss   = criterion(y_pred, batch_actions.float()) 
        total_loss += loss.item() 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        b += 1 
        
        loss_list.append(loss.item())
        
    if itr % (n_epoch//20)==0:
        print(f'Epoch {itr} Loss: {total_loss/b:.4f}')

Epoch 0 Loss: 0.1812
Epoch 150 Loss: 0.0570
Epoch 300 Loss: 0.0404
Epoch 450 Loss: 0.0349
Epoch 600 Loss: 0.0316
Epoch 750 Loss: 0.0309
Epoch 900 Loss: 0.0292
Epoch 1050 Loss: 0.0300
Epoch 1200 Loss: 0.0271
Epoch 1350 Loss: 0.0254
Epoch 1500 Loss: 0.0244
Epoch 1650 Loss: 0.0233
Epoch 1800 Loss: 0.0223
Epoch 1950 Loss: 0.0214
Epoch 2100 Loss: 0.0205
Epoch 2250 Loss: 0.0196
Epoch 2400 Loss: 0.0197
Epoch 2550 Loss: 0.0187
Epoch 2700 Loss: 0.0185
Epoch 2850 Loss: 0.0175
Epoch 3000 Loss: 0.0174


### Inference

In [11]:
env_meta={'env_name': 'Lift',
 'type': 1,
 'env_kwargs': {'has_renderer': False,
  'has_offscreen_renderer': False,
  'ignore_done': True,
  'use_object_obs': True,
  'use_camera_obs': False,
  'control_freq': 20,
  'controller_configs': {'type': 'OSC_POSE',
   'input_max': 1,
   'input_min': -1,
   'output_max': [0.05, 0.05, 0.05, 0.5, 0.5, 0.5],
   'output_min': [-0.05, -0.05, -0.05, -0.5, -0.5, -0.5],
   'kp': 150,
   'damping': 1,
   'impedance_mode': 'fixed',
   'kp_limits': [0, 300],
   'damping_limits': [0, 10],
   'position_limits': None,
   'orientation_limits': None,
   'uncouple_pos_ori': True,
   'control_delta': True,
   'interpolation': None,
   'ramp_ratio': 0.2},
  'robots': ['Panda'],
  'camera_depths': False,
  'camera_heights': 84,
  'camera_widths': 84,
  'reward_shaping': False}}

In [None]:
env = EnvUtils.create_env_from_metadata(
    env_meta=env_meta, 
    render=False,            # no on-screen rendering
    render_offscreen=True,   # off-screen rendering to support rendering video frames
)
dummy_spec = dict(  obs=dict( low_dim=["robot0_eef_pos"], rgb=[], ),)
ObsUtils.initialize_obs_utils_with_obs_specs(obs_modality_specs=dummy_spec)

In [17]:
def rollout(env, rollout_horizon = 400, video_path=None):
    total_reward=0 
    select_keys=['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']

    obs = env.reset()
    state_dict = env.get_state()
    # hack that is necessary for robosuite tasks for deterministic action playback
    # obs = env.reset_to(state_dict)

    if video_path is not None:
        video_writer = imageio.get_writer(video_path, fps=20)
        print(f"Writing video to {video_path}")
    camera_names=["agentview"]

    for step_i in range(rollout_horizon):
        select_obs=np.hstack( [obs[key] for key in select_keys] ) 
        state=torch.from_numpy(select_obs).float()
        # state=state.to(device='cuda')

        act = bc(state).detach().cpu().numpy()
        next_obs, r, done, _ = env.step(act)

        # compute reward
        total_reward += r
        success = env.is_success()["task"]

        if video_path is not None:
            video_img = env.render(mode="rgb_array", height=512, width=512, camera_name="agentview")
            video_writer.append_data(video_img)


        # break if done or if success
        if done or success:
            # print(f'stop: done={done} success={success}')
            break

        # update for next iter
        obs = deepcopy(next_obs)

    if video_path is not None:
        video_writer.close()
    return total_reward

In [None]:
n_rollout=20
s=0
for i in range(n_rollout):
    r=rollout(env, video_path=None)
    s+=r
    print(f'Rollout {i} Success: {r}')

print(f'\nAverage Reward: {s/n_rollout:.2f}')

### Render a video

In [None]:
r=rollout(env, video_path="lift_inf.mp4")
r

Writing video to lift_inf.mp4


1.0