In [1]:
from pathlib import Path

import gym_pusht  # noqa: F401
import gymnasium as gym
import imageio
import numpy
import torch
from huggingface_hub import snapshot_download

from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#  Create a directory to store the video of the evaluation
output_directory = Path("outputs/eval/example_pusht_diffusion")
output_directory.mkdir(parents=True, exist_ok=True)

# Download the diffusion policy for pusht environment
pretrained_policy_path = Path(snapshot_download("lerobot/diffusion_pusht"))
# OR uncomment the following to evaluate a policy from the local outputs/train folder.
# pretrained_policy_path = Path("outputs/train/2024-11-26/18-20-16_pusht_diffusion_default")

policy = DiffusionPolicy.from_pretrained(pretrained_policy_path)
policy.eval()

# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available. Device set to:", device)
else:
    device = torch.device("cpu")
    print(f"GPU is not available. Device set to: {device}. Inference will be slower than on GPU.")
    # Decrease the number of reverse-diffusion steps (trades off a bit of quality for 10x speed)
    policy.diffusion.num_inference_steps = 10

policy.to(device)



Fetching 11 files: 100%|██████████| 11/11 [00:00<00:00, 100956.99it/s]


Loading weights from local directory
GPU is available. Device set to: cuda


DiffusionPolicy(
  (normalize_inputs): Normalize(
    (buffer_observation_image): ParameterDict(
        (mean): Parameter containing: [torch.cuda.FloatTensor of size 3x1x1 (cuda:0)]
        (std): Parameter containing: [torch.cuda.FloatTensor of size 3x1x1 (cuda:0)]
    )
    (buffer_observation_state): ParameterDict(
        (max): Parameter containing: [torch.cuda.FloatTensor of size 2 (cuda:0)]
        (min): Parameter containing: [torch.cuda.FloatTensor of size 2 (cuda:0)]
    )
  )
  (normalize_targets): Normalize(
    (buffer_action): ParameterDict(
        (max): Parameter containing: [torch.cuda.FloatTensor of size 2 (cuda:0)]
        (min): Parameter containing: [torch.cuda.FloatTensor of size 2 (cuda:0)]
    )
  )
  (unnormalize_outputs): Unnormalize(
    (buffer_action): ParameterDict(
        (max): Parameter containing: [torch.cuda.FloatTensor of size 2 (cuda:0)]
        (min): Parameter containing: [torch.cuda.FloatTensor of size 2 (cuda:0)]
    )
  )
  (diffusion): Diff

In [3]:
# Initialize evaluation environment to render two observation types:
# an image of the scene and state/position of the agent. The environment
# also automatically stops running after 300 interactions/steps.
env = gym.make(
    "gym_pusht/PushT-v0",
    obs_type="pixels_agent_pos",
    max_episode_steps=300,
)




In [17]:
policy.config.noise_scheduler_type = "DDIM"
policy.config.n_action_steps = 1
policy.reset()


In [16]:
policy.config

DiffusionConfig(n_obs_steps=2, horizon=16, n_action_steps=8, input_shapes={'observation.image': [3, 96, 96], 'observation.state': [2]}, output_shapes={'action': [2]}, input_normalization_modes={'observation.image': 'mean_std', 'observation.state': 'min_max'}, output_normalization_modes={'action': 'min_max'}, vision_backbone='resnet18', crop_shape=[84, 84], crop_is_random=True, pretrained_backbone_weights=None, use_group_norm=True, spatial_softmax_num_keypoints=32, use_separate_rgb_encoder_per_camera=False, down_dims=[512, 1024, 2048], kernel_size=5, n_groups=8, diffusion_step_embed_dim=128, use_film_scale_modulation=True, noise_scheduler_type='DDIM', num_train_timesteps=100, beta_schedule='squaredcos_cap_v2', beta_start=0.0001, beta_end=0.02, prediction_type='epsilon', clip_sample=True, clip_sample_range=1.0, num_inference_steps=100, do_mask_loss_for_padding=False)

In [18]:
# Reset the policy and environmens to prepare for rollout

numpy_observation, info = env.reset(seed=1234)

# Prepare to collect every rewards and all the frames of the episode,
# from initial state to final state.
rewards = []
frames = []

# Render frame of the initial state
frames.append(env.render())

step = 0
done = False
while not done:
    # Prepare observation for the policy running in Pytorch
    state = torch.from_numpy(numpy_observation["agent_pos"])
    image = torch.from_numpy(numpy_observation["pixels"])

    # Convert to float32 with image from channel first in [0,255]
    # to channel last in [0,1]
    state = state.to(torch.float32)
    image = image.to(torch.float32) / 255
    image = image.permute(2, 0, 1)

    # Send data tensors from CPU to GPU
    state = state.to(device, non_blocking=True)
    image = image.to(device, non_blocking=True)

    # Add extra (empty) batch dimension, required to forward the policy
    state = state.unsqueeze(0)
    image = image.unsqueeze(0)

    # Create the policy input dictionary
    observation = {
        "observation.state": state,
        "observation.image": image,
    }
    
    # Predict the next action with respect to the current observation
    with torch.inference_mode():
        action = policy.select_action(observation)

    # Prepare the action for the environment
    numpy_action = action.squeeze(0).to("cpu").numpy()
    # print(numpy_action)
    # Step through the environment and receive a new observation
    numpy_observation, reward, terminated, truncated, info = env.step(numpy_action)
    print(f"{step=} {reward=} {terminated=}")

    # Keep track of all the rewards and frames
    rewards.append(reward)
    frames.append(env.render())

    # The rollout is considered done when the success state is reach (i.e. terminated is True),
    # or the maximum number of iterations is reached (i.e. truncated is True)
    done = terminated | truncated | done
    step += 1



step=0 reward=0.3549526479034709 terminated=False
step=1 reward=0.3549526479034709 terminated=False
step=2 reward=0.3549526479034709 terminated=False
step=3 reward=0.3549526479034709 terminated=False
step=4 reward=0.3549526479034709 terminated=False
step=5 reward=0.3549526479034709 terminated=False
step=6 reward=0.3549526479034709 terminated=False
step=7 reward=0.3549526479034709 terminated=False
step=8 reward=0.3549526479034709 terminated=False
step=9 reward=0.3549526479034709 terminated=False
step=10 reward=0.3549526479034709 terminated=False
step=11 reward=0.3549526479034709 terminated=False
step=12 reward=0.3549526479034709 terminated=False
step=13 reward=0.3583105184393292 terminated=False
step=14 reward=0.37652863787608654 terminated=False
step=15 reward=0.39283125939387686 terminated=False
step=16 reward=0.36670874447322566 terminated=False
step=17 reward=0.3135768316292473 terminated=False
step=18 reward=0.26680185903854986 terminated=False
step=19 reward=0.22731800821590223 te

In [19]:
if terminated:
    print("Success!")
else:
    print("Failure!")

# Get the speed of environment (i.e. its number of frames per second).
fps = env.metadata["render_fps"]

# Encode all frames into a mp4 video.
video_path = output_directory / "rollout.mp4"
imageio.mimsave(str(video_path), numpy.stack(frames), fps=fps)

print(f"Video of the evaluation is available in '{video_path}'.")

Failure!




Video of the evaluation is available in 'outputs/eval/example_pusht_diffusion/rollout.mp4'.
