In [2]:
import os
import sys
import json
import pickle
import numpy as np
from math import *
import random

import torch
from torch.utils.data import Dataset
from torch.utils.data.dataloader import DataLoader
import matplotlib.pyplot as plt

from mingpt.model import GPT
from mingpt.trainer import Trainer
from mingpt.utils import set_seed, setup_logging, CfgNode as CN

In [3]:
class TrajectoryDataset(Dataset):
    """
    generate sequences of consecutive state-actions in the humanoid environment
    """
    @staticmethod
    def get_default_config():
        C = CN()
        C.STATE_DIM  = 206
        C.ACTION_DIM = 56
        C.SEQ_LEN    = 5     # 5 (206+56) = 1310 tokens
        C.MIN_RANGE  = -300
        C.MAX_RANGE  = +300
        C.PRECISION  = 1e-2   # 60000 tokens in total
        return C

    def __init__(self, config):
        self.config    = config
        self.traj_list = None
        self.buffer    = []
        self.accum_idx = [ 0 ]
        prep_trajs     = []
        self.encoder   = lambda x: np.floor(((np.round(np.clip(x, a_min=self.config.MIN_RANGE, a_max=self.config.MAX_RANGE), decimals=int(log10(1/self.config.PRECISION)))+300)/\
                                                (self.config.MAX_RANGE-self.config.MIN_RANGE))*self.get_vocab_size()).astype(np.longlong)
        self.decoder   = lambda x: ((x/self.get_vocab_size())*(self.config.MAX_RANGE-self.config.MIN_RANGE))+self.config.MIN_RANGE
        # load trajectory dataset
        with open(self.config.TRAJ_ADDR,"rb") as f: self.traj_list = pickle.load(f)
        # maximum available sample trajectory in each agent trajectory
        for trajectory in self.traj_list :
            if len(trajectory)//2<self.config.SEQ_LEN: continue
            self.buffer     += [ len(trajectory)//2-self.config.SEQ_LEN+1 ]
            self.accum_idx  += [ self.accum_idx[-1] + self.buffer[-1] ]
            # preprocess raw trajectory lists
            prep_trajs += [ np.concatenate(trajectory, axis=1)[0] ]
        # we choose random trajectories then form them when they are needed during the training
        # finding indeces are base on the accumulative index array
        self.accum_idx = np.array(self.accum_idx)[1:]
        # update trajectory list
        self.traj_list = prep_trajs
        self.state_action = self.config.STATE_DIM + self.config.ACTION_DIM

    def get_vocab_size(self):
        # based on range and precision 1d tokens are provided, the semantics for each of these 
        # will be learned in the latent space given number of channels
        return int(1+(self.config.MAX_RANGE-self.config.MIN_RANGE)/self.config.PRECISION)

    def get_block_size(self):
        # a simple shifted SEQ_LEN block of state actions
        return self.config.SEQ_LEN * (self.config.STATE_DIM + self.config.ACTION_DIM)

    def __len__(self):
        # total number of the data inside the dataset!
        return self.accum_idx[-1]

    def __getitem__(self, idx):
        # form sample
        traj_index  = np.where(self.accum_idx > idx)[0][0]
        slice_index = idx - (self.accum_idx[traj_index-1] if traj_index>0 else 0)
        # form input domain element
        trajectory  = self.encoder(self.traj_list[traj_index][slice_index*self.state_action:1+(slice_index+self.config.SEQ_LEN)*self.state_action])
        x = torch.tensor(trajectory[0:self.config.SEQ_LEN*(self.config.STATE_DIM+self.config.ACTION_DIM)], dtype=torch.long)
        # form output domain element
        y = torch.tensor(trajectory[1:1+self.config.SEQ_LEN*(self.config.STATE_DIM+self.config.ACTION_DIM)], dtype=torch.long)
        return x, y

In [4]:
def get_config():

    C = CN()

    # system
    C.system = CN()
    C.system.seed = 3407
    C.system.work_dir = './out/dynamic'

    # data
    C.data = TrajectoryDataset.get_default_config()
    C.data.TRAJ_ADDR = 'E:/MoCAP/MCDH/minGPT/trajs.pkl'

    # model
    C.model = GPT.get_default_config()
    C.model.model_type = 'gpt-nano'

    # trainer
    C.trainer = Trainer.get_default_config()
    C.trainer.learning_rate = 5e-4 # the model we're using is so small that we can go a bit faster

    return C

In [None]:
# get default config and overrides from the command line, if any
config = get_config()
# config.merge_from_args(sys.argv[1:])
print(config)
setup_logging(config)
set_seed(config.system.seed)

# construct the training dataset
train_dataset = TrajectoryDataset(config.data)

# construct the model
config.model.vocab_size = train_dataset.get_vocab_size()
config.model.block_size = train_dataset.get_block_size()
model = GPT(config.model)

# construct the trainer object
trainer = Trainer(config.trainer, model, train_dataset)

# iteration callback
def batch_end_callback(trainer):

    if trainer.iter_num % 10 == 0:
        print(f"iter_dt {trainer.iter_dt * 1000:.2f}ms; iter {trainer.iter_num}: train loss {trainer.loss.item():.5f}")

    if trainer.iter_num % 500 == 0:
        # evaluate both the train and test score
        model.eval()
        with torch.no_grad():
            # sample from the model...
            random_traj  = train_dataset.__getitem__([random.randint(0,train_dataset.__len__()-1)])
            state_action, num_actions = config.STATE_DIM+config.ACTION_DIM, 10
            traj_action  = [random_traj[i*state_action+config.STATE_DIM:(i+1)*state_action] for i in range(num_actions)]
            context = torch.concatenate([random_traj[:config.STATE_DIM]]+traj_action, dim=1)[None,...].to(trainer.device)
            y_tokenized = model.generate(context, temperature=1.0, do_sample=False, top_k=10, n_actions=num_actions)[0]
            y = train_dataset.decoder(y_tokenized)
            # draw the result plot
            fig, ax = plt.subplots(figsize=(8, 6))
            ax.plot(list(range(num_actions*state_action)), random_traj[:num_actions*state_action], color='blue', label='ground truth')
            ax.plot(list(range(num_actions*state_action)), y_tokenized, color='red', label='generated')
            plt.plot()

        # save the latest model
        print("saving model")
        ckpt_path = os.path.join(config.system.work_dir, "model.pt")
        torch.save(model.state_dict(), ckpt_path)
        # revert model to training mode
        model.train()

trainer.set_callback('on_batch_end', batch_end_callback)

# run the optimization
trainer.run()