In [6]:
import h5py
import gym
from d4rl.kitchen.kitchen_envs import KitchenBase
import imageio
import numpy as np
from PIL import Image
from d4rl.kitchen.kitchen_envs import OBS_ELEMENT_INDICES, OBS_ELEMENT_GOALS
BONUS_THRESH = 0.3
# load data
f = h5py.File('/nfs/kun2/users/dashora7/franka_datasets/complete-v0.hdf5')
rews = f['rewards'][:]
terms = f['terminals'][:]
actions = f['actions'][:]

# every time we hit a 1.0 reward, we have a new end of trajectory
# starts are after terminals
starts = [0] + list(terms.nonzero()[0])
ends = (np.diff(rews) == 1.0) & (rews[1:] == 1.0)
ends = ends.nonzero()[0]
trajs = [(s, e + 10) for s, e in zip(starts, ends)]
trajs[-1] = (trajs[-1][0], trajs[-1][1])
trajs.pop(8)
trajs.pop(10)
trajs.pop(16)
print(trajs)

[(0, 51), (183, 233), (368, 421), (559, 610), (754, 815), (960, 1012), (1155, 1205), (1337, 1391), (1719, 1770), (1913, 1965), (2304, 2355), (2496, 2553), (2690, 2741), (2873, 2932), (3077, 3133), (3283, 3333)]


In [7]:
# make a sticky random smooth sampler
base_sampler = lambda: np.random.normal(0, 0.15)
class SmoothSampler:
    def __init__(self, std, smooth=0.9):
        self.std = std
        self.smooth = smooth
        self.prev = 0
    def __call__(self):
        self.prev = self.smooth * self.prev + (1 - self.smooth) * np.random.normal(0, self.std)
        return self.prev
    def reset(self):
        self.prev = 0

In [8]:
# make kitchen env and follow actions and render
# env = KitchenBase(control_mode='joint_velocity')

env = KitchenBase(
    control_mode='joint_velocity',
    # wrist_cam_concat_with_fixed_view=True,
    image_obs=True, imwidth=128, imheight=128,
    dense=False, frame_skip=40)

obses = []
dones = []
rewards = []
total, total_success = 0, 0
noise_levels = [0] #  [0, 0.05, 0.1, 0.2, 0.3]
num_trajs = 20 # 2000
loops = (num_trajs / len(noise_levels)) / len(trajs)

for noise in noise_levels:
    smooth_sampler = SmoothSampler(noise, smooth=0.9)
    base_sampler = lambda: np.random.normal(0, min(0.2, noise))
    for i in range(int(loops)):
        for traj in trajs:
            atraj = f['actions'][traj[0]:traj[1] + 1]
            env.reset()
            
            im = env.render(mode="rgb_array")
            
            im = np.array(Image.fromarray(im).resize((128, 128)))
            obses.append(im)
            dones.append(0)
            rewards.append(0)
            
            _, goal_qp, _, _, _ = env.robot.get_obs(env, robot_noise_ratio=0)
            done = False
            success = False
            imgs = []
            for i in range(0, atraj.shape[0]):
                a = atraj[i]
                # add epsilon noise
                a = a # + base_sampler() +  smooth_sampler()
                o, r, d, i = env.step(a)
                
                t, next_q_obs, qv, next_obj_obs, obj_qv = env.robot.get_obs(
                    env, robot_noise_ratio=env.robot_noise_ratio
                )
                idx_offset = len(next_q_obs)
                
                element_idx = OBS_ELEMENT_INDICES["microwave"]
                distance = np.linalg.norm(
                    next_obj_obs[..., element_idx - idx_offset] - OBS_ELEMENT_GOALS["microwave"]
                )
                r = distance < BONUS_THRESH
                
                if r:
                    success = True
                
                im = env.render(mode="rgb_array")
                im = np.array(Image.fromarray(im).resize((128, 128)))
                obses.append(im)
                dones.append(0)
                rewards.append(float(r))
            
            for t in range(20):
                _, qpos, qvel, _, _ = env.robot.get_obs(env, robot_noise_ratio=0)
                delta = goal_qp - qpos
                a = delta / 0.1
                env.step(a)
                im = env.render(mode="rgb_array")
                im = np.array(Image.fromarray(im).resize((128, 128)))
                obses.append(im)
                dones.append(0)
                rewards.append(float(r))
                if np.linalg.norm(delta) < 0.02:
                    break   
            
            smooth_sampler.reset()
            dones[-1] = 1.0
            if success:
                total_success += 1

print("Success rate: ", total_success / num_trajs)
# save video
imageio.mimsave('kitchen.mp4', obses, fps=30)

  self.robot_mode[i] = read_config_from_node(
  arr = _as_array(src, shape)
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


Success rate:  0.7


In [9]:
# save as npy dict
d = {
    'observations':
        {'image': np.array(obses)},
    'dones_float': np.array(dones),
    'rewards': np.array(rewards)
}
np.save('/nfs/kun2/users/dashora7/franka_datasets/microwave-optonly-reset.npy', d)

In [60]:
import pickle
# save dict
with open('/nfs/kun2/users/dashora7/franka_datasets/microwave_custom_reset.pkl', 'wb') as f:
    pickle.dump(d, f)