### Install initial environment in Google Colab

In [1]:
import sys
import os

if 'google.colab' in sys.modules:
  if not os.path.exists('/content/.already_installed'):
    !git clone https://github.com/FlutterbaseDotCom/hdt

    !apt-get install -y swig
    !pip install box2d-py
    !pip install 'gymnasium[box2d]'
    !pip install 'stable-baselines3[extra]'
    !pip install toml
    !pip install wandb
    !pip install datasets
    !pip install transformers
    !pip install torchviz
    !pip install accelerate -U

    with open('/content/.already_installed', 'w') as f:
        f.write('done')
  %cd /content/hdt


### Imports

In [2]:
import os
import random
from dataclasses import dataclass

import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torchviz
import wandb
from datasets import Dataset, load_dataset
from stable_baselines3 import DQN
from stable_baselines3.common.torch_layers import NatureCNN
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from torch.utils.data import Subset
from transformers import Trainer, TrainingArguments

from dt.configuration_decision_transformer import DecisionTransformerConfig
from dt.modeling_decision_transformer import DecisionTransformerModel
from extract_cnn import prepare_observation_array
from dt.trainable_dt import DecisionTransformerGymDataCollator, TrainableDT





  from .autonotebook import tqdm as notebook_tqdm


### Generate Data

In [3]:
from utils.config import MAX_EPISODE_STEPS, NUM_EPISODES, RTG_GAMMA


env =  gym.make('CarRacing-v2', continuous=False) #, render_mode='human'
model = DQN
tmp_model_path ='./models/dql_pretrained/dql_rl_11.zip'
loaded_model = model.load(tmp_model_path)

features = {
    "observations": [],
    "actions": [],
    "rewards": [],
    "dones": [],
    "rtg": []
}

def calculate_rtg(rewards, gamma):
    rtg = np.zeros_like(rewards)
    rtg[-1] = rewards[-1]
    for i in reversed(range(len(rewards) - 1)):
        rtg[i] = rewards[i] + gamma * rtg[i + 1]
    return rtg

for episode in range(NUM_EPISODES):
    print(f"Episode: {episode} of {NUM_EPISODES}:" )
    [obs, _] = env.reset()
    done = False

    o, a, r, d, g = [], [], [], [], []
    total_reward = 0
    sti = 0
    while not done:
        sti = sti + 1
        if sti > MAX_EPISODE_STEPS:
            break

        # if random.random() < epsilon:
        #     action = 3# env.action_space.sample()
        # else:
        action, _states = loaded_model.predict(obs,deterministic=True)
        new_obs, reward, done, t, i = env.step(action)
        total_reward = total_reward + reward
        oarr = prepare_observation_array(obs)
        o.append(oarr.flatten())
        a.append(action.item())
        r.append(reward)
        d.append(done)
        obs = new_obs
        print(".", end="")

        # check if last 50 steps does not contain a single positive reward
        if len(r) > 100 and max(r[-50:]) <= 0:
            # cut last 50 and set done to True
            r = r[:-50]
            d = d[:-50]
            a = a[:-50]
            d[-1] = True
            print('\nstopping due to the last 50 steps not negative rewards')
            break
    print(f"\nTotal reward: {total_reward} episodes steps: {len(o)}")

    features["observations"].append(o)
    features["actions"].append(a)
    features["rewards"].append(r)
    features["dones"].append(d)
    episode_rtg = calculate_rtg(r, RTG_GAMMA)
    features["rtg"].append(episode_rtg)

env.close()
#print(len(features["actions"]))






Exception: Can't get attribute '_make_function' on <module 'cloudpickle.cloudpickle' from '/Users/jacob/.pyenv/versions/3.10.12/lib/python3.10/site-packages/cloudpickle/cloudpickle.py'>
Exception: Can't get attribute '_make_function' on <module 'cloudpickle.cloudpickle' from '/Users/jacob/.pyenv/versions/3.10.12/lib/python3.10/site-packages/cloudpickle/cloudpickle.py'>


Episode: 0 of 10:
.............................................
Total reward: 7.922360248447221 episodes steps: 45
Episode: 1 of 10:
.............................................
Total reward: 6.252688172043026 episodes steps: 45
Episode: 2 of 10:
.............................................
Total reward: 8.774336283185855 episodes steps: 45
Episode: 3 of 10:
.............................................
Total reward: 1.9308681672025787 episodes steps: 45
Episode: 4 of 10:
.............................................
Total reward: 7.584592145015122 episodes steps: 45
Episode: 5 of 10:
.............................................
Total reward: 9.734875444839874 episodes steps: 45
Episode: 6 of 10:
.............................................
Total reward: 9.105442176870763 episodes steps: 45
Episode: 7 of 10:
.............................................
Total reward: 8.198412698412714 episodes steps: 45
Episode: 8 of 10:
.............................................
Total reward: 9

### Persist Dataset

In [14]:
#dataset = Dataset.from_dict(features)

class FeatureDataset(Dataset):
    def __init__(self, src):
        self.size = len(src["observations"])  # Assuming all lists are the same length
        self.src = src

    def __len__(self):
        return self.size
    
    def __getitems__(self, index):
        return [self._item(i) for i in index]

    def _item(self, idx):
        # It is better to ensure this is an internal method used within the class only.
        if isinstance(idx, str):
            return self.src[idx]
        
        return {
            "observations": self.src["observations"][idx],
            "actions": self.src["actions"][idx], 
            "rewards": self.src["rewards"][idx],
            "dones": self.src["dones"][idx],
            "rtg": self.src["rtg"][idx]
        }
    
    def __getitem__(self, index):
        # Here, we use 'index' instead of 'i'
        return self._item(index)

feature_dataset = FeatureDataset(src=features)
len(feature_dataset[0]["observations"][0])
#dataset.save_to_disk('datasets/car_racing_0070_0045/')

# dataset_size = len(dataset)
# split_point = int(0.9 * dataset_size)
# #dataset is already shuffled
# train_dataset = Subset(dataset, range(split_point))
# val_dataset = Subset(dataset, range(split_point, dataset_size))

#from datasets import load_from_disk
#dataset = load_from_disk('datasets/car_racing_002/')


27648

### Split dataset

In [6]:
from utils.config import CONFIG, WANDB_ID, WNDB_NAME
# TOML-formatted string


os.environ["WANDB_DISABLED"] = "false"
os.environ['WANDB_NOTEBOOK_NAME'] = 'DT.ipynb'
os.environ['WANDB_MODE']='online'
os.environ["WANDB_LOG_MODEL"] = "checkpoint"


wandb.login(key="f060d3284088ffaf4624e2de8b236f39711a99a2")
wandb.init(resume=WANDB_ID,
           name = WNDB_NAME,
           mode="online",
           entity="yakiv",
            project="CarRacingDT",
            #resume= "allow"
            config=CONFIG
           )


env =  gym.make('CarRacing-v2', continuous=False) #, render_mode='human'


[34m[1mwandb[0m: Currently logged in as: [33myakiv[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/jacob/.netrc


### Train

In [15]:

collator = DecisionTransformerGymDataCollator(feature_dataset, max_len=CONFIG["max_length"],   max_ep_len=CONFIG["max_ep_len"],)

dt_config = DecisionTransformerConfig(state_dim=collator.state_dim, act_dim=collator.act_dim,
                                      max_length = CONFIG["max_length"],
                                      max_ep_len = CONFIG["max_ep_len"],  
                                      )
print(dt_config.to_dict())

model = TrainableDT(dt_config)


training_args = TrainingArguments(
    output_dir="output/",
    report_to="wandb",
    save_steps=CONFIG["save_steps"],
    remove_unused_columns=False,
    optim="adamw_torch",
    num_train_epochs=CONFIG["num_train_epochs"],
    per_device_train_batch_size=CONFIG["per_device_train_batch_size"],
    learning_rate=CONFIG["learning_rate"],
    weight_decay=CONFIG["weight_decay"],
    warmup_ratio=CONFIG["warmup_ratio"],
    max_grad_norm=CONFIG["max_grad_norm"],
    logging_steps=CONFIG["LOG_INTERVAL"],
)

class DummyDataset(Dataset):
    def __init__(self, size):
        self.size = size
        self.arr = [0] * size

    def __len__(self):
        return self.size
    def __getitems__(self, index):
        print(index)
        return index
    
    def __getitem__(self, index):
        print(index)
        return index
        
        

    

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=DummyDataset(len(feature_dataset)),
    data_collator=collator,

)

trainer.train()


TypeError: list indices must be integers or slices, not str

In [26]:
#len(feature_dataset[0]["observations"][0])
print(feature_dataset.data_list)

{'observations': [[array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0.39215687, 0.39215687, 0.39215687, ..., 0.        , 0.        ,
       0.        ], dtype=float32), array([0.39215687, 0.39215687, 0.39215687, ..., 0.        , 0.        ,
       0.        ], dtype=float32), array([0.39215687, 0.39215687, 0.39215687, ..., 0.        , 0.        ,
       0.        ], dtype=float32), array([0.39215687, 0.39215687, 0.39215687, ..., 0.        , 0.        ,
       0.        ], dtype=float32), array([0.39215687, 0.39215687, 0.39215687, ..., 0.        , 0.        ,
       0.        ], dtype=float32), array([0.39215687, 0.39215687, 0.39215687, ..., 0.        , 0.        ,
       0.        ], dtype=float32), array([0.39215687, 0.39215687, 0.39215687, ..., 0.        , 0.        ,
       0.        ], dtype=float32), array([0.39215687, 0.39215687, 0.39215687, ..., 0.        , 0.    