In [1]:
import numpy as np
from stable_baselines3.common.vec_env import DummyVecEnv
import gymnasium as gym
import random

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from Dataloader.SequenceExtractor import SequenceExtractor, collate_fn
from Models.DecisionTransformer import DecisionTransformers

import pytorch_lightning as pl
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import ModelCheckpoint

2024-03-05 00:45:32.483346: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator().manual_seed(0)

In [3]:
env_id = 'CarRacing-v2'
render_mode = "rgb_array"

env = DummyVecEnv([lambda: gym.make(env_id, render_mode=render_mode)])

In [4]:
sequenceExtractorTrain = SequenceExtractor(env, dataset_len = 128)
sequenceExtractorVal = SequenceExtractor(env, dataset_len = 128, starting_num = len(sequenceExtractorTrain))

In [5]:
# Creating a data loader
batch_size = 128
dataloader_train = DataLoader(sequenceExtractorTrain, batch_size=batch_size, 
                        shuffle=True, num_workers=1, collate_fn = collate_fn,
                        worker_init_fn=seed_worker, generator=g)
dataloader_val = DataLoader(sequenceExtractorVal, batch_size=batch_size, 
                        shuffle=True, num_workers=1, collate_fn = collate_fn,
                        worker_init_fn=seed_worker, generator=g)

In [6]:
logger = TensorBoardLogger("tb_logs", name="DecisionTransformers")
checkpoint_callback = ModelCheckpoint(dirpath="checkpoints/", save_top_k=2, monitor="train_loss",filename='{epoch}-{train_loss:.2f}')

In [7]:
torch.manual_seed(42)
model = DecisionTransformers(d_model = 128, action_space_dim = env.action_space.shape[0], 
                             observation_space = env.observation_space, max_seq_len = sequenceExtractor.seq_len)  # Example vocab size
trainer = pl.Trainer(max_epochs=100, logger=logger)#, callbacks=[checkpoint_callback])  # Example trainer configuration
trainer.fit(model, dataloader_train, dataloader_val)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of learnable parameters for the CNN: 2728064
Number of learnable parameters for the entire architecture: 4082051


/data/cino/.local/lib/python3.10/site-packages/pytorch_lightning/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                  | Type                    | Params
------------------------------------------------------------------
0 | embedding_reward      | Linear                  | 256   
1 | embedding_action      | Linear                  | 512   
2 | embedding_observation | CustomResNet            | 2.7 M 
3 | transformer           | TransformerArchitecture | 1.3 M 
4 | fc1                   | Sequential              | 33.0 K
5 | output                | Linear                  | 771   
6 | huber_loss            | SmoothL1Loss            | 0     
------------------------------------------------------------------
4.1 M     Trainable params
0         Non-trainable params
4.1 M     Total params
16.328    Total estimated model params size (MB)
/data/cino/.local/li

Training: |                                               | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.


In [9]:
trainer.save_checkpoint("checkpoints/DecisionTransformers-Overfitting.ckpt")

In [8]:
checkpoint_callback.best_model_path

''