In [1]:
pip install git+https://github.com/huggingface/transformers


Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /private/var/folders/9v/q5zsxfwx7p12414rj_w1zmqr0000gn/T/pip-req-build-smq1sdjc
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /private/var/folders/9v/q5zsxfwx7p12414rj_w1zmqr0000gn/T/pip-req-build-smq1sdjc
  Resolved https://github.com/huggingface/transformers to commit 234168c4dc837da10c594408f15807a1d6f3fab4
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [9]:
from transformers import DecisionTransformerModel

model_name = "edbeeching/decision-transformer-gym-hopper-expert"
model = DecisionTransformerModel.from_pretrained(model_name)


In [2]:
!pip install glfw
!pip install mujoco
!pip install gymnasium
!pip install numpy
!pip install gymnasium moviepy
!pip install gymnasium[mujoco]
!pip install imageio


zsh:1: no matches found: gymnasium[mujoco]


In [7]:
import os
os.environ['MUJOCO_GL'] = 'glfw'


In [8]:
import gymnasium as gym
print(gym.__version__)

1.0.0


In [9]:
import gymnasium as gym
env = gym.make("Hopper-v5")
state_dim = env.observation_space.shape[0] # state size
act_dim = env.action_space.shape[0] # action size


In [10]:
print(state_dim)
print(act_dim)

11
3


In [24]:
# def get_action(model, states, actions, rewards, returns_to_go, timesteps):
#     # Ensure consistent dimensions
#     states = states.reshape(1, -1, state_dim)
#     actions = actions.reshape(1, -1, act_dim)
#     returns_to_go = returns_to_go.reshape(1, -1, 1)
#     timesteps = timesteps.reshape(1, -1)

#     # Take only the last 20 timesteps
#     states = states[:, -20:]
#     actions = actions[:, -20:]
#     returns_to_go = returns_to_go[:, -20:]
#     timesteps = timesteps[:, -20:]

#     # Calculate padding needed
#     padding = 20 - states.shape[1]
#     attention_mask = torch.cat([torch.zeros(padding), torch.ones(states.shape[1])])
#     attention_mask = attention_mask.to(dtype=torch.long).reshape(1, -1)

#     # Pad sequences to length 20
#     states = torch.cat([torch.zeros((1, padding, state_dim)).to(device), states], dim=1).float()
#     actions = torch.cat([torch.zeros((1, padding, act_dim)).to(device), actions], dim=1).float()
#     returns_to_go = torch.cat([torch.zeros((1, padding, 1)).to(device), returns_to_go], dim=1).float()
#     timesteps = torch.cat([torch.zeros((1, padding), dtype=torch.long).to(device), timesteps], dim=1)

#     # Ensure attention mask matches sequence length
#     attention_mask = attention_mask[:, :20]  # Limit to 20 timesteps

#     action_preds = model(
#         states=states,
#         actions=actions,
#         returns_to_go=returns_to_go,
#         timesteps=timesteps,
#         attention_mask=attention_mask,
#         return_dict=False,
#     )[0]

#     return action_preds[0, -1]


In [25]:
import numpy as np
import torch
import gymnasium as gym
from gymnasium.wrappers import RecordVideo

# Constants
TARGET_RETURN = 3.6  # Normalized during training
max_ep_len = 1000

state_mean = np.array(
    [1.3490015, -0.11208222, -0.5506444, -0.13188992, -0.00378754, 2.6071432,
     0.02322114, -0.01626922, -0.06840388, -0.05183131, 0.04272673]
)

state_std = np.array(
    [0.15980862, 0.0446214, 0.14307782, 0.17629202, 0.5912333, 0.5899924,
     1.5405099, 0.8152689, 2.0173461, 2.4107876, 5.8440027]
)

state_mean = torch.from_numpy(state_mean)
state_std = torch.from_numpy(state_std)

# Wrap the environment for recording
env = gym.make("Hopper-v5", render_mode="rgb_array")
env = RecordVideo(env, video_folder="./videos", episode_trigger=lambda x: True)

state, info = env.reset()
state_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]

# Initialize variables
target_return = torch.tensor(TARGET_RETURN).float().reshape(1, 1)
states = torch.from_numpy(state).reshape(1, state_dim).float()
actions = torch.zeros((0, act_dim)).float()
rewards = torch.zeros(0).float()
timesteps = torch.tensor(0).reshape(1, 1).long()


def get_action(model, states, actions, rewards, returns_to_go, timesteps):
    states = states.reshape(1, -1, model.config.state_dim)
    actions = actions.reshape(1, -1, model.config.act_dim)
    returns_to_go = returns_to_go.reshape(1, -1, 1)
    timesteps = timesteps.reshape(1, -1)

    states = states[:, -model.config.max_length:]
    actions = actions[:, -model.config.max_length:]
    returns_to_go = returns_to_go[:, -model.config.max_length:]
    timesteps = timesteps[:, -model.config.max_length:]

    # Padding for sequence
    padding = model.config.max_length - states.shape[1]
    attention_mask = torch.cat([torch.zeros(padding), torch.ones(states.shape[1])])
    attention_mask = attention_mask.to(dtype=torch.long).reshape(1, -1)
    states = torch.cat([torch.zeros((1, padding, state_dim)), states], dim=1).float()
    actions = torch.cat([torch.zeros((1, padding, act_dim)), actions], dim=1).float()
    returns_to_go = torch.cat([torch.zeros((1, padding, 1)), returns_to_go], dim=1).float()
    timesteps = torch.cat([torch.zeros((1, padding), dtype=torch.long), timesteps], dim=1)

    state_preds, action_preds, return_preds = model(
        states=states,
        actions=actions,
        rewards=rewards,
        returns_to_go=returns_to_go,
        timesteps=timesteps,
        attention_mask=attention_mask,
        return_dict=False,
    )
    return action_preds[0, -1]


# Take steps in the environment
for t in range(max_ep_len):
    actions = torch.cat([actions, torch.zeros((1, act_dim))], dim=0)
    rewards = torch.cat([rewards, torch.zeros(1)])

    action = get_action(
        model,
        (states - state_mean) / state_std,
        actions,
        rewards,
        target_return,
        timesteps
    )
    actions[-1] = action
    action = action.detach().numpy()

    state, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated

    cur_state = torch.from_numpy(state).reshape(1, state_dim)
    states = torch.cat([states, cur_state], dim=0)
    rewards[-1] = reward

    pred_return = target_return[0, -1] - (reward / TARGET_RETURN)
    target_return = torch.cat([target_return, pred_return.reshape(1, 1)], dim=1)
    timesteps = torch.cat([timesteps, torch.ones((1, 1)).long() * (t + 1)], dim=1)

    if done:
        break

env.close()

# Display video
import os
from IPython.display import Video

video_path = os.path.join("./videos", os.listdir("./videos")[0])  # Get first video file
Video(video_path, embed=True)

  logger.warn(
Exception ignored in: <function OffScreenViewer.__del__ at 0x1146aadd0>
Traceback (most recent call last):
  File "/Users/sriramsohan/miniforge3/envs/Unity_Agents_env/lib/python3.10/site-packages/gymnasium/envs/mujoco/mujoco_rendering.py", line 202, in __del__
    self.free()
  File "/Users/sriramsohan/miniforge3/envs/Unity_Agents_env/lib/python3.10/site-packages/gymnasium/envs/mujoco/mujoco_rendering.py", line 199, in free
    self.opengl_context.free()
AttributeError: 'OffScreenViewer' object has no attribute 'opengl_context'


In [12]:
##with recorder is below