In [1]:
import random

import numpy as np
import torch
import torch.nn.functional as F
from tensorboardX import SummaryWriter
from torch.distributions import Categorical
from torch.utils.data import DataLoader
from tqdm import trange

from src.utils.dt import DecisionTransformer, DT_Trainer, SequenceDataset
from src.utils.utils import count_parameters, eval_goal

In [2]:
SEED = 911

# env
SIZE = 9
N_ACTIONS = 5
REWARD_NUNIQUE = 2

# dataset
HISTORY_LEN = 800_000  # there is no `learning` histories after this step

# dataloader
BATCH_SIZE = 32

# transformer
SEQ_LEN = 256
EPISODE_LEN = 20
TIME_REL = True
EMBEDDING_DIM = 64
NUM_LAYERS = 4
NUM_HEADS = 4
ATTENTION_DROPOUT = 0.5
RESIDUAL_DROPOUT = 0.1
EMBEDDING_DROPOUT = 0

# adam optimizer
MAX_LR = 3e-4
BETA1 = 0.9
BETA2 = 0.99
GRAD_CLIP_NORM = 1

# cosine decay scheduler
MIN_LR = 2e-6
NUM_STEPS1 = int(57 * 800_000 / BATCH_SIZE / SEQ_LEN)
print("steps for 1 look:", NUM_STEPS1)
NUM_STEPS = 4 * NUM_STEPS1

EVAL_STEP = 64
EVAL_LEN = 256
TEST_TASKS_PATH = "src/test_tasks.txt"
HISTORY_PATH = "src/histories"

steps for 1 look: 5566


In [3]:
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [4]:
dt = DecisionTransformer(
    state_dim=SIZE * SIZE,
    action_dim=N_ACTIONS,
    reward_nunique=REWARD_NUNIQUE,
    seq_len=SEQ_LEN,
    episode_len=EPISODE_LEN,
    time_rel=TIME_REL,
    embedding_dim=EMBEDDING_DIM,
    num_layers=NUM_LAYERS,
    num_heads=NUM_HEADS,
    attention_dropout=ATTENTION_DROPOUT,
    residual_dropout=RESIDUAL_DROPOUT,
    embedding_dropout=EMBEDDING_DROPOUT,
)

print("number of trainable parameters: ", count_parameters(dt))

number of trainable parameters:  1132101


In [5]:
dataset = SequenceDataset(
    history_path=HISTORY_PATH,
    history_len=HISTORY_LEN,
    seq_len=SEQ_LEN,
    time_rel=TIME_REL,
)

In [6]:
trainloader = iter(
    DataLoader(
        dataset,
        batch_size=BATCH_SIZE,
        pin_memory=False,
        num_workers=1,
    )
)

In [7]:
opt = torch.optim.Adam(dt.parameters(), lr=MAX_LR, betas=(BETA1, BETA2))
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    opt, T_max=NUM_STEPS, eta_min=MIN_LR
)

In [8]:
trainer = DT_Trainer(dt, opt, scheduler, F.cross_entropy, GRAD_CLIP_NORM)

In [9]:
writer = SummaryWriter(f"src/logs/dt")

In [10]:
test_tasks = np.loadtxt(TEST_TASKS_PATH, delimiter=",", dtype=int)

In [None]:
for step in trange(NUM_STEPS):
    batch = next(trainloader)
    data = trainer.step(batch)

    for stat_name in data:
        writer.add_scalar(f"train/{stat_name}", data[stat_name], step)

    if step % EVAL_STEP == 0:
        with torch.no_grad():
            test_goal = random.choice(test_tasks)
            print("eval_goal:", test_goal)
            eval_goal(
                dt=dt,
                length=EVAL_LEN,
                time_rel=TIME_REL,
                goal=test_goal,
                size=SIZE,
                episode_length=EPISODE_LEN,
                mean_ep=10,
                writer=writer,
            )

In [12]:
torch.save(dt, f"untrained_dt_E{EMBEDDING_DIM}_L{NUM_LAYERS}_H{NUM_HEADS}.pt")