In [1]:
# Author: Lucas Gandara

import gymnasium as gym
import torch
from tensordict import TensorDict
from torchrl.data import LazyTensorStorage, TensorDictReplayBuffer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
env = gym.make("HumanoidStandup-v5", render_mode="human")
env.reset()

replay_buffer = TensorDictReplayBuffer(
    storage=LazyTensorStorage(100), collate_fn=lambda x: x, batch_size=1
)

for _ in range(100):
    action = (
        env.action_space.sample()
    )  # agent policy that uses the observation and info
    observation, reward, terminated, truncated, info = env.step(action)

    replay_buffer.add(
        data=TensorDict(
            {
                "observation": observation,
                "action": action,
                "reward": reward,
                "terminated": terminated,
                "truncated": truncated,
                "info": info,
            }
        )
    )

    if terminated or truncated:
        observation, info = env.reset()

env.close()

In [8]:
print("Replay Buffer sample")
print(replay_buffer.sample()["action"].shape)


Replay Buffer sample
torch.Size([1, 17])


In [11]:
for i, data in enumerate(replay_buffer):
    if i == 10:
        print(data["action"])
        break

tensor([[-0.0682, -0.1056, -0.3145, -0.3301, -0.1430, -0.0453,  0.3092,  0.2733,
          0.0308,  0.1049, -0.0440, -0.0599, -0.0723, -0.2041,  0.0495, -0.2794,
          0.2900]])
