In [1]:
import gymnasium as gym
import random
import os
from stable_baselines3.common.vec_env import DummyVecEnv

from codebase.Models.DecisionTransformer import DecisionTransformers
from codebase.ModelTester import ModelTester

import pytorch_lightning as pl

2024-04-29 16:42:43.652684: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def get_last_modified_file(directory):
    # Ensure the directory exists
    if not os.path.exists(directory):
        return None

    # Initialize variables to track the last modified file and its time
    latest_file = None
    latest_mod_time = 0

    # Loop through all files in the directory
    for filename in os.listdir(directory):
        # Get the full path of the file
        filepath = os.path.join(directory, filename)
        
        # Check if it's a file and not a directory
        if os.path.isfile(filepath):
            # Get the modification time of the file
            mod_time = os.path.getmtime(filepath)
            
            # Update the latest file if this file is more recently modified
            if mod_time > latest_mod_time:
                latest_mod_time = mod_time
                latest_file = filename

    return latest_file

In [3]:
env_id = 'CarRacing-v2'
render_mode = "human"

env = DummyVecEnv([lambda: gym.make(env_id, render_mode=render_mode)])

In [4]:
# Example usage
directory_path = 'checkpoints/'
model_checkpoints = get_last_modified_file(directory_path)
print("Last modified file:", model_checkpoints)

Last modified file: epoch=128-train_loss=0.05-val_loss=0.05.ckpt


In [5]:
model = DecisionTransformers.load_from_checkpoint(
    directory_path + model_checkpoints, 
    d_model = 128, 
    action_space_dim = env.action_space.shape[0], 
    observation_space = env.observation_space, 
    max_seq_len = 64
)

# disable randomness, dropout, etc...
model.eval()

Number of learnable parameters for the CNN: 2728064
Number of learnable parameters for the entire architecture: 10679171


DecisionTransformers(
  (embedding_reward): Linear(in_features=1, out_features=128, bias=True)
  (embedding_action): Linear(in_features=3, out_features=128, bias=True)
  (embedding_observation): CustomResNet(
    (first_cnn_layer): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
      (1): LayerNorm((32, 94, 94), eps=1e-05, elementwise_affine=True)
      (2): GELU(approximate='none')
      (3): Dropout2d(p=0.2, inplace=False)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (second_cnn_layer): Sequential(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
      (1): LayerNorm((32, 45, 45), eps=1e-05, elementwise_affine=True)
      (2): GELU(approximate='none')
      (3): Dropout2d(p=0.2, inplace=False)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (cnn_layers): ModuleList(
      (0): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1),

In [6]:
def checkAction(action):
    #checking steering
    if action[0] > 1:
        print("Invalid steering. ", action[0])
        action[0] = 1
    elif action[0] < -1:
        print("Invalid steering. ", action[0])
        action[0] = 1
    #checking gas
    if action[1] > 1:
        print("Invalid gas. ", action[1])
        action[1] = 1
    elif action[1] < 0:
        print("Invalid gas. ", action[1])
        action[1] = 0
    #checking brake
    if action[2] > 1:
        print("Invalid brake. ", action[2])
        action[2] = 1
    elif action[2] < 0:
        print("Invalid brake. ", action[2])
        action[2] = 0
    return action

In [7]:
model_tester = ModelTester(model = model, 
                           env_name = 'CarRacing-v2',
                           actionCheck = checkAction,
                           render_mode = "rgb_array",
                           seq_len = 64,
                          )
model_tester.test_model(episodes=3, starting_rewards = [0., 0.5, 1., 1.2] , render=True)

Invalid brake.  -0.0014841259
Invalid brake.  -0.0012839921
Invalid brake.  -0.0029661916
Invalid brake.  -0.0031032301
Invalid brake.  -0.0043531545
Invalid brake.  -0.004063029
Invalid brake.  -0.0020205192
Invalid brake.  -0.0059243888
Invalid brake.  -0.0008202493
Invalid brake.  -0.0004972182
Invalid brake.  -0.0019823294
Invalid brake.  -0.009586699
Invalid brake.  -0.02829853
Invalid brake.  -0.022567607
Invalid brake.  -0.0042101573
Invalid brake.  -0.00305653
Invalid brake.  -0.00028947927
Invalid brake.  -0.0027486645
Invalid brake.  -0.020945787
Invalid brake.  -0.0015108883
Invalid brake.  -0.007802315
Invalid brake.  -0.013282638
Invalid brake.  -0.021283176
Invalid brake.  -0.0002584383
Invalid brake.  -0.0116898455
Invalid brake.  -7.354841e-05
Invalid brake.  -0.013847135
Invalid brake.  -0.01778778
Invalid brake.  -0.0029991362
Invalid brake.  -0.006460406
Invalid brake.  -0.0047068093
Invalid brake.  -0.0009328984
Invalid brake.  -0.014905758
Invalid brake.  -0.012872

{'0.0': array([-61.91276], dtype=float32),
 '0.5': array([544.86945], dtype=float32),
 '1.0': array([722.19464], dtype=float32),
 '1.2': array([640.56934], dtype=float32)}