In [1]:
import numpy as np
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecVideoRecorder
import gymnasium as gym
import random
import os
from matplotlib import pyplot as plt
from typing import Callable

import torch
import torch.nn as nn
import torch.nn.functional as F

from Models.DecisionTransformer import DecisionTransformers

import pytorch_lightning as pl

2024-04-29 01:13:43.759890: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def get_last_modified_file(directory):
    # Ensure the directory exists
    if not os.path.exists(directory):
        return None

    # Initialize variables to track the last modified file and its time
    latest_file = None
    latest_mod_time = 0

    # Loop through all files in the directory
    for filename in os.listdir(directory):
        # Get the full path of the file
        filepath = os.path.join(directory, filename)
        
        # Check if it's a file and not a directory
        if os.path.isfile(filepath):
            # Get the modification time of the file
            mod_time = os.path.getmtime(filepath)
            
            # Update the latest file if this file is more recently modified
            if mod_time > latest_mod_time:
                latest_mod_time = mod_time
                latest_file = filename

    return latest_file

In [3]:
env_id = 'CarRacing-v2'
render_mode = "human"

env = DummyVecEnv([lambda: gym.make(env_id, render_mode=render_mode)])

In [4]:
# Example usage
directory_path = 'checkpoints/'
model_checkpoints = get_last_modified_file(directory_path)
print("Last modified file:", model_checkpoints)

Last modified file: epoch=98-train_loss=0.06-val_loss=0.07.ckpt


In [5]:
model = DecisionTransformers.load_from_checkpoint(directory_path + model_checkpoints, d_model = 128, action_space_dim = env.action_space.shape[0], 
                             observation_space = env.observation_space, max_seq_len = 64)

# disable randomness, dropout, etc...
model.eval()

Number of learnable parameters for the CNN: 2728064
Number of learnable parameters for the entire architecture: 10679171


DecisionTransformers(
  (embedding_reward): Linear(in_features=1, out_features=128, bias=True)
  (embedding_action): Linear(in_features=3, out_features=128, bias=True)
  (embedding_observation): CustomResNet(
    (first_cnn_layer): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
      (1): LayerNorm((32, 94, 94), eps=1e-05, elementwise_affine=True)
      (2): GELU(approximate='none')
      (3): Dropout2d(p=0.2, inplace=False)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (second_cnn_layer): Sequential(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
      (1): LayerNorm((32, 45, 45), eps=1e-05, elementwise_affine=True)
      (2): GELU(approximate='none')
      (3): Dropout2d(p=0.2, inplace=False)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (cnn_layers): ModuleList(
      (0): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1),

In [6]:
def checkAction(action):
    #checking steering
    if action[0] > 1:
        print("Invalid steering. ", action[0])
        action[0] = 1
    elif action[0] < -1:
        print("Invalid steering. ", action[0])
        action[0] = 1
    #checking gas
    if action[1] > 1:
        print("Invalid gas. ", action[1])
        action[1] = 1
    elif action[1] < 0:
        print("Invalid gas. ", action[1])
        action[1] = 0
    #checking brake
    if action[2] > 1:
        print("Invalid brake. ", action[2])
        action[2] = 1
    elif action[2] < 0:
        print("Invalid brake. ", action[2])
        action[2] = 0
    return action

In [9]:
class ModelTester:
    def __init__(self, 
                 env_name: str,
                 model: nn.Module,
                 device: str = "cuda",
                 seq_len: int = 32,
                 render_mode: str = "human",
                 actionCheck: Callable = None,
                 max_reward: float = 800.,
                ):
        """Initialize the tester with an environment and a model."""
        self.env = DummyVecEnv([lambda: gym.make(env_name, render_mode=render_mode)])
        self.model = model.to(device=device)
        self.device = device
        self.seq_len = seq_len
        self.actionCheck = actionCheck
        self.max_reward = max_reward

        #defining the inputs
        self.rewards = torch.zeros(1, seq_len, 1)
        self.observations = torch.zeros(1, seq_len, env.observation_space.shape[2], env.observation_space.shape[0], env.observation_space.shape[1])
        self.actions = torch.zeros(1, seq_len, env.action_space.shape[0])
    
    def run_episode(self, 
                    render: bool = False,
                    starting_rewards: float = 1.,
                    starting_action: gym.spaces.Space = None
                   ):
        """Run one episode to test the model."""
        state = self.env.reset()
        done = False
        total_reward = 0
        step = 0 
        if starting_action is None:
            starting_action = self.env.action_space.sample()

        #init the sequence
        self.reset_sequence()

        self.rewards[0][step] = torch.tensor(starting_rewards)
        self.actions[0][step] = torch.tensor(starting_action)
        self.observations[0][step] = torch.tensor(np.array(state)/ 255. ).squeeze(0).permute(2, 0, 1)

        while not done:
            if render:
                self.env.render()

            action = self.model({
                "rewards" : self.rewards.to(device=self.device, dtype=torch.float32),
                "observations" : self.observations.to(device=self.device, dtype=torch.float32),
                "actions" : self.actions.to(device=self.device, dtype=torch.float32),
            })
            next_action = np.array(action[0][step].cpu().detach())
            if self.actionCheck is not None:
                next_action = self.actionCheck(next_action)
            
            state, reward, done, info = self.env.step([next_action])
            
            next_reward = self.rewards[0][0].cpu().item() * self.max_reward - reward

            # updating the sequence of rewards, observations, actions
            if step < self.seq_len - 1:
                step += 1
                self.rewards[0][step] = torch.tensor(next_reward/self.max_reward)
                self.actions[0][step] = torch.tensor(next_action)
                self.observations[0][step] = torch.tensor(np.array(state)/ 255. ).squeeze(0).permute(2, 0, 1)
            else:
                self.rewards = torch.cat([self.rewards[:,1:], torch.tensor(next_reward/self.max_reward).reshape(1, 1, -1)], dim=1)
                self.observations = torch.cat([self.observations[:,1:], torch.tensor(state/255.).permute(0, 3, 1, 2).unsqueeze(0)], dim=1) 
                self.actions = torch.cat([self.actions[:,1:], torch.tensor(next_action).reshape(1, 1, -1)], dim=1) 
            
            total_reward += reward

        return total_reward
    
    def test_model(self, episodes=100, render=False):
        """Test the model over a number of episodes and average the rewards."""
        total_rewards = [self.run_episode(render=render) for _ in range(episodes)]
        average_reward = sum(total_rewards) / episodes
        return average_reward

    def close_env(self):
        """Close the Gym environment."""
        self.env.close()

    def reset_sequence(self):
        """Deleting the sequences."""
        self.rewards = torch.zeros(1, self.seq_len, 1)
        self.observations = torch.zeros(1, self.seq_len, env.observation_space.shape[2], env.observation_space.shape[0], env.observation_space.shape[1])
        self.actions = torch.zeros(1, self.seq_len, env.action_space.shape[0])

In [10]:
model_tester = ModelTester(model = model, 
                           env_name = 'CarRacing-v2',
                           actionCheck = checkAction,
                           seq_len = 64,
                          )
model_tester.test_model(episodes=1, render=True)

Invalid brake.  -9.012967e-05
Invalid brake.  -0.008850243
Invalid brake.  -0.0031641573
Invalid brake.  -0.0016443059
Invalid brake.  -0.0040624924
Invalid brake.  -0.0017506182
Invalid brake.  -0.004779052
Invalid brake.  -0.0048908517
Invalid brake.  -0.015899658
Invalid brake.  -0.028963096
Invalid brake.  -0.0047607757
Invalid brake.  -0.0009059943
Invalid brake.  -0.0038973764
Invalid brake.  -0.0027975924
Invalid brake.  -0.0025800914
Invalid brake.  -0.0006334521
Invalid brake.  -0.004620502
Invalid brake.  -0.013505805
Invalid brake.  -0.009927407
Invalid brake.  -0.001940459
Invalid brake.  -0.005467184
Invalid brake.  -0.006068997
Invalid brake.  -0.0007993281
Invalid brake.  -0.010205474
Invalid brake.  -0.008546583
Invalid brake.  -0.002854988
Invalid brake.  -0.0021985024
Invalid brake.  -0.0024382435
Invalid brake.  -0.0024226606
Invalid brake.  -0.007389471
Invalid brake.  -0.015647437
Invalid brake.  -0.022207424
Invalid brake.  -0.00816071
Invalid brake.  -0.011428585

KeyboardInterrupt: 