In [None]:
%%capture
!pip install wget
!git clone https://github.com/KKMOfficial/minGPT.git
import wget
link = "https://drive.usercontent.google.com/download?id=1vY9l-110dFmbqm2AkRNllqp0MnXJaxH6&export=download&authuser=0&confirm=t&uuid=018afe8b-f360-451b-bac4-f8e714956771&at=APZUnTWO3XNqjvhgXvPBJ6GMkbxT%3A1723832924892"
wget.download(link, out="/content/minGPT/trajs.pkl")

In [1]:
import os
import sys
import json
import pickle
import numpy as np
from math import *
import random

import torch
from torch.utils.data import Dataset
from torch.utils.data.dataloader import DataLoader
import matplotlib.pyplot as plt

from mingpt.model import GPT
from mingpt.trainer import Trainer
from mingpt.utils import set_seed, setup_logging, CfgNode as CN

In [2]:
class TrajectoryDataset(Dataset):
    """
    generate sequences of consecutive state-actions in the humanoid environment
    """
    @staticmethod
    def get_default_config(sequence_length=5, traj_addr=None):
        C = CN()
        C.STATE_DIM  = 206
        C.ACTION_DIM = 56
        C.SEQ_LEN    = sequence_length     # 5 (206+56) = 1310 tokens
        C.MIN_RANGE  = -300
        C.MAX_RANGE  = +300
        C.PRECISION  = 1e-2   # 60000 tokens in total
        if not traj_addr is None : C.TRAJ_ADDR=traj_addr
        return C

    def __init__(self, config):
        self.config    = config
        self.traj_list = None
        self.buffer    = []
        self.accum_idx = [ 0 ]
        prep_trajs     = []
        self.encoder   = lambda x: np.floor(((np.round(np.clip(x, a_min=self.config.MIN_RANGE, a_max=self.config.MAX_RANGE), decimals=int(log10(1/self.config.PRECISION)))+300)/\
                                                (self.config.MAX_RANGE-self.config.MIN_RANGE))*self.get_vocab_size()).astype(np.longlong)
        self.decoder   = lambda x: ((x/self.get_vocab_size())*(self.config.MAX_RANGE-self.config.MIN_RANGE))+self.config.MIN_RANGE
        # load trajectory dataset
        with open(self.config.TRAJ_ADDR,"rb") as f: self.traj_list = pickle.load(f)
        # maximum available sample trajectory in each agent trajectory
        for trajectory in self.traj_list :
            if len(trajectory)//2<self.config.SEQ_LEN: continue
            self.buffer     += [ len(trajectory)//2-self.config.SEQ_LEN+1 ]
            self.accum_idx  += [ self.accum_idx[-1] + self.buffer[-1] ]
            # preprocess raw trajectory lists
            prep_trajs += [ np.concatenate(trajectory, axis=1)[0] ]
        # we choose random trajectories then form them when they are needed during the training
        # finding indeces are base on the accumulative index array
        self.accum_idx = np.array(self.accum_idx)[1:]
        # update trajectory list
        self.traj_list = prep_trajs
        self.state_action = self.config.STATE_DIM + self.config.ACTION_DIM

    def get_vocab_size(self):
        # based on range and precision 1d tokens are provided, the semantics for each of these 
        # will be learned in the latent space given number of channels
        return int(1+(self.config.MAX_RANGE-self.config.MIN_RANGE)/self.config.PRECISION)

    def get_block_size(self):
        # a simple shifted SEQ_LEN block of state actions
        return self.config.SEQ_LEN * (self.config.STATE_DIM + self.config.ACTION_DIM)

    def __len__(self):
        # total number of the data inside the dataset!
        return self.accum_idx[-1]

    def __getitem__(self, idx):
        # form sample
        traj_index  = np.where(self.accum_idx > idx)[0][0]
        slice_index = idx - (self.accum_idx[traj_index-1] if traj_index>0 else 0)
        # form input domain element
        trajectory  = self.encoder(self.traj_list[traj_index][slice_index*self.state_action:1+(slice_index+self.config.SEQ_LEN)*self.state_action])
        x = torch.tensor(trajectory[0:self.config.SEQ_LEN*(self.config.STATE_DIM+self.config.ACTION_DIM)], dtype=torch.long)
        # form output domain element
        y = torch.tensor(trajectory[1:1+self.config.SEQ_LEN*(self.config.STATE_DIM+self.config.ACTION_DIM)], dtype=torch.long)
        return x, y

In [3]:
def get_config():

    C = CN()

    # system
    C.system = CN()
    C.system.seed = 3407
    C.system.work_dir = './out/dynamic'

    # data
    C.data = TrajectoryDataset.get_default_config()
    C.data.TRAJ_ADDR = 'E:/MoCAP/MCDH/minGPT/trajs.pkl'

    # model
    C.model = GPT.get_default_config()
    C.model.model_type = 'gpt-nano'

    # trainer
    C.trainer = Trainer.get_default_config()
    C.trainer.learning_rate = 5e-4 # the model we're using is so small that we can go a bit faster

    return C

In [5]:
# get default config and overrides from the command line, if any
config = get_config()
# config.merge_from_args(sys.argv[1:])
setup_logging(config)
set_seed(config.system.seed)

# construct the training dataset
test_dataset  = TrajectoryDataset(TrajectoryDataset.get_default_config(10,"E:/MoCAP/MCDH/minGPT/trajs.pkl"))
train_dataset = TrajectoryDataset(config.data)


# construct the model
config.model.vocab_size = train_dataset.get_vocab_size()
config.model.block_size = train_dataset.get_block_size()
print(config)
model = GPT(config.model)

# construct the trainer object
trainer = Trainer(config.trainer, model, train_dataset)

# iteration callback
def batch_end_callback(trainer):

    if trainer.iter_num % 10 == 0:
        print(f"iter_dt {trainer.iter_dt * 1000:.2f}ms; iter {trainer.iter_num}: train loss {trainer.loss.item():.5f}")

    if trainer.iter_num % 500 == 0:
        # evaluate both the train and test score
        model.eval()
        with torch.no_grad():
            # sample from the model...
            random_traj  = test_dataset.__getitem__(random.randint(0,test_dataset.__len__()))
            state_action, num_actions = config.data.STATE_DIM+config.data.ACTION_DIM, 9
            traj_action  = [random_traj[0][i*state_action+config.data.STATE_DIM:(i+1)*state_action] for i in range(num_actions)]
            context = torch.concatenate([random_traj[0][:config.data.STATE_DIM]]+traj_action, dim=0)[None,...].to(trainer.device)
            y_tokenized = model.generate(context, temperature=1.0, do_sample=False, top_k=10, n_actions=num_actions)[0]
            # will use the decoded version inside the predictive sampling controller
            y = train_dataset.decoder(y_tokenized)
            # draw the result plot
            fig, ax = plt.subplots(figsize=(8, 6))
            ax.plot(list(range((num_actions-1)*state_action+config.data.STATE_DIM)), random_traj[1][:(num_actions-1)*state_action+config.data.STATE_DIM], color='blue', label='ground truth')
            ax.plot(list(range(y_tokenized.shape[0])), y_tokenized, color='red', label='generated')
            plt.plot()

        # save the latest model
        print("saving model")
        ckpt_path = os.path.join(config.system.work_dir, "model.pt")
        torch.save(model.state_dict(), ckpt_path)
        # revert model to training mode
        model.train()

trainer.set_callback('on_batch_end', batch_end_callback)

# run the optimization
trainer.run()

system:
    seed: 3407
    work_dir: ./out/dynamic
data:
    STATE_DIM: 206
    ACTION_DIM: 56
    SEQ_LEN: 5
    MIN_RANGE: -300
    MAX_RANGE: 300
    PRECISION: 0.01
    TRAJ_ADDR: E:/MoCAP/MCDH/minGPT/trajs.pkl
model:
    model_type: gpt-nano
    n_layer: None
    n_head: None
    n_embd: None
    vocab_size: 60001
    block_size: 1310
    embd_pdrop: 0.1
    resid_pdrop: 0.1
    attn_pdrop: 0.1
trainer:
    device: cpu
    num_workers: 4
    max_iters: None
    batch_size: 4
    learning_rate: 0.0005
    betas: (0.9, 0.95)
    weight_decay: 0.1
    grad_norm_clip: 1.0

number of parameters: 3.03M
running on device cpu
iter_dt 0.00ms; iter 0: train loss 11.00617


100%|██████████| 2302/2302 [12:42<00:00,  3.02it/s]


saving model
iter_dt 7853.13ms; iter 10: train loss 10.62611
iter_dt 8220.00ms; iter 20: train loss 10.19668
iter_dt 8096.31ms; iter 30: train loss 9.70120
iter_dt 8534.70ms; iter 40: train loss 9.21869
iter_dt 7905.70ms; iter 50: train loss 8.76496
iter_dt 7821.45ms; iter 60: train loss 8.29212
iter_dt 8224.63ms; iter 70: train loss 7.84907
iter_dt 8206.23ms; iter 80: train loss 7.42505
iter_dt 8262.61ms; iter 90: train loss 6.98743
iter_dt 8192.84ms; iter 100: train loss 6.67651
iter_dt 7933.59ms; iter 110: train loss 6.40026
iter_dt 8072.47ms; iter 120: train loss 6.14435
iter_dt 7998.50ms; iter 130: train loss 5.90521
