<a href="https://colab.research.google.com/github/PsorTheDoctor/visuomotor-robot-policies/blob/main/diffusion_policy/diffusion_policy_state.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Diffusion policy: state-based environment

In [None]:
!pip3 install -q torch==1.13.1 torchvision==0.14.1 diffusers==0.18.2 \
scikit-image==0.19.3 scikit-video==1.1.11 zarr==2.12.0 numcodecs==0.10.2 \
pygame==2.1.2 pymunk==6.2.1 gym==0.26.2 shapely==1.8.4

In [None]:
!git clone https://github.com/PsorTheDoctor/visuomotor-robot-policies.git
%cd visuomotor-robot-policies/

In [17]:
import os
import numpy as np
import gdown
import torch
import torch.nn as nn
import collections
from skvideo.io import vwrite
from IPython.display import Video
from huggingface_hub.utils import IGNORE_GIT_FOLDER_PATTERNS

from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from diffusers.training_utils import EMAModel
from diffusers.optimization import get_scheduler
from tqdm.auto import tqdm

from utils.env import PushTEnv
from utils.dataset import PushTStateDataset, normalize_data, unnormalize_data
from utils.unet import ConditionalUnet1D

env = PushTEnv()
env.seed(1000)
obs, IGNORE_GIT_FOLDER_PATTERNS = env.reset()
action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)

with np.printoptions(precision=4, suppress=True, threshold=5):
  print("Obs: ", repr(obs))
  print("Obs:        [agent_x,  agent_y,  block_x,  block_y,    block_angle]")
  print("Action: ", repr(action))
  print("Action:   [target_agent_x, target_agent_y]")

Obs:  array([133.9656, 183.8927, 292.    , 351.    ,   2.9196])
Obs:        [agent_x,  agent_y,  block_x,  block_y,    block_angle]
Action:  array([126.7373, 333.7111])
Action:   [target_agent_x, target_agent_y]


##Dataset

In [9]:
dataset_path = "pusht_cchi_v7_replay.zarr.zip"
if not os.path.isfile(dataset_path):
  id = "1KY1InLurpMvJDRb14L9NlXT_fEsCvVUq&confirm=t"
  gdown.download(id=id, output=dataset_path, quiet=False)

pred_horizon = 16
obs_horizon = 2
action_horizon = 8

dataset = PushTStateDataset(
    dataset_path=dataset_path,
    pred_horizon=pred_horizon,
    obs_horizon=obs_horizon,
    action_horizon=action_horizon
)
stats = dataset.stats

dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=256,
    num_workers=1,
    shuffle=True,
    pin_memory=True,
    persistent_workers=True
)
batch = next(iter(dataloader))
print("batch['obs'].shape:", batch['obs'].shape)
print("batch['action'].shape", batch['action'].shape)

batch['obs'].shape: torch.Size([256, 2, 5])
batch['action'].shape torch.Size([256, 16, 2])


##Network

In [10]:
obs_dim = 5
action_dim = 2

noise_pred_net = ConditionalUnet1D(
    input_dim=action_dim,
    global_cond_dim=obs_dim*obs_horizon
)
noised_action = torch.randn((1, pred_horizon, action_dim))
obs = torch.zeros((1, obs_horizon, obs_dim))
diffusion_iter = torch.zeros((1,))

noise = noise_pred_net(
    sample=noised_action,
    timestep=diffusion_iter,
    global_cond=obs.flatten(start_dim=1)
)
denoised_action = noised_action - noise
diffusion_iters = 100
noise_scheduler = DDPMScheduler(
    num_train_timesteps=diffusion_iters,
    beta_schedule='squaredcos_cap_v2',
    clip_sample=True,
    prediction_type='epsilon'
)
device = torch.device('cuda')
_ = noise_pred_net.to(device)

number of parameters: 6.535322e+07


##Training

In [None]:
epochs = 100

# Exponential Moving Average
ema = EMAModel(
    parameters=noise_pred_net.parameters(), power=0.75
)
optimizer = torch.optim.AdamW(
    params=noise_pred_net.parameters(),
    lr=1e-4, weight_decay=1e-6
)
lr_scheduler = get_scheduler(
    name='cosine',
    optimizer=optimizer,
    num_warmup_steps=500,
    num_training_steps=len(dataloader) * epochs
)

with tqdm(range(epochs), desc='Epoch') as tglobal:
  for epoch_idx in tglobal:
    epoch_loss = list()
    with tqdm(dataloader, desc='Batch', leave=False) as tepoch:
      for nbatch in tepoch:
        nobs = nbatch['obs'].to(device)
        naction = nbatch['action'].to(device)
        B = nobs.shape[0]

        obs_cond = nobs[:, :obs_horizon, :]
        obs_cond = obs_cond.flatten(start_dim=1)

        noise = torch.randn(naction.shape, device=device)

        timesteps = torch.randint(
            0, noise_scheduler.config.num_train_timesteps,
            (B,), device=device
        ).long()

        noisy_actions = noise_scheduler.add_noise(
            naction, noise, timesteps
        )
        noise_pred = noise_pred_net(
            noisy_actions, timesteps, global_cond=obs_cond
        )
        loss = nn.functional.mse_loss(noise_pred, noise)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        lr_scheduler.step()
        ema.step(noise_pred_net.parameters())

        loss_cpu = loss.item()
        epoch_loss.append(loss_cpu)
        tepoch.set_postfix(loss=loss_cpu)
    tglobal.set_postfix(loss=np.mean(epoch_loss))

ema_noise_pred_net = noise_pred_net
ema.copy_to(ema_noise_pred_net.parameters)

##Inference

In [18]:
max_steps = 200
env = PushTEnv()
env.seed(100000)  # use a seed >200 to avoid initial states seen in the training data

obs, info = env.reset()
obs_deque = collections.deque(
    [obs] * obs_horizon, maxlen=obs_horizon
)
imgs = [env.render(mode='rgb_array')]
rewards = list()
done = False
step_idx = 0

with tqdm(total=max_steps, desc='Eval PushTStateEnv') as pbar:
  while not done:
    B = 1
    obs_seq = np.stack(obs_deque)
    nobs = normalize_data(obs_seq, stats=stats['obs'])
    nobs = torch.from_numpy(nobs).to(device, dtype=torch.float32)

    with torch.no_grad():
      obs_cond = nobs.unsqueeze(0).flatten(start_dim=1)

      # Initialize action from Gaussian noise
      noisy_action = torch.randn(
          (B, pred_horizon, action_dim), device=device
      )
      naction = noisy_action
      noise_scheduler.set_timesteps(diffusion_iters)
      for k in noise_scheduler.timesteps:
        # Predict noise
        noise_pred = ema_noise_pred_net(
            sample=naction,
            timestep=k,
            global_cond=obs_cond
        )
        # Inverse diffusion step (remove noise)
        naction = noise_scheduler.step(
            model_output=noise_pred,
            timestep=k,
            sample=naction
        ).prev_sample

    naction = naction.detach().to('cpu').numpy()
    naction = naction[0]
    action_pred = unnormalize_data(naction, stats=stats['action'])

    start = obs_horizon - 1
    end = start + action_horizon
    action = action_pred[start:end, :]

    # Execute action_horizon number of steps without replanning
    for i in range(len(action)):
      obs, reward, done, _, info = env.step(action[i])
      obs_deque.append(obs)
      rewards.append(reward)
      imgs.append(env.render(mode='rgb_array'))

      step_idx += 1
      pbar.update(1)
      pbar.set_postfix(reward=reward)
      if step_idx > max_steps:
        done = True
      if done:
        break

print('Score:', max(rewards))

from IPython.display import Video
vwrite('vis.mp4', imgs)
Video('vis.mp4', embed=True, width=256, height=256)

Eval PushTStateEnv:   0%|          | 0/200 [00:00<?, ?it/s]

Score: 0.9928486767975621
