In [1]:
import numpy as np
import torch
import torch.nn as nn
import random
from torch.nn import functional as F
from minGPT.mingpt import model
# make deterministic
from minGPT.mingpt.utils import set_seed
set_seed(42)
# set up logging
import logging
logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
)

In [3]:
import automataBattle
from importlib import reload
reload(automataBattle)
from torch.utils.data import Dataset
class FastLearnAutomataDataset(Dataset):
    def __init__(self, nStates, nSymbols, split, sequenceLen, numSequences):
        self.nStates = nStates
        self.nSymbols = nSymbols
        self.split = split # train/test
        self.vocab_size = nSymbols*nSymbols
        # +1 due to potential carry overflow, but then -1 because very last digit doesn't plug back
        self.block_size = sequenceLen
        
        self.sequenceLen, self.numSequences = sequenceLen, numSequences
        
        '''
        # split up all addition problems into either training data or test data
        num = (10**self.ndigit)**2 # total number of possible combinations
        r = np.random.RandomState(1337) # make deterministic
        perm = r.permutation(num)
        num_test = min(int(num*0.2), 1000) # 20% of the whole dataset, or only up to 1000
        self.ixes = perm[:num_test] if split == 'test' else perm[num_test:]
        '''


    def __len__(self):
        return self.numSequences

    def __getitem__(self, idx):
        
        a = automataBattle.Automata(nStates=self.nStates, symbols=range(self.nSymbols), randomConnect=True)
        a.minimize()
        while a.complexity() != self.nStates:
            a = automataBattle.Automata(nStates=self.nStates, symbols=range(self.nSymbols), randomConnect=True)
            a.minimize()
        X, Y = a.generate(self.sequenceLen, lambda: random.choice(range(self.nSymbols)))
        x = torch.tensor(X)
        y = torch.tensor(Y) # predict the output of the Automata
        previous = y[:-1]
        shiftedForwadInputsOne = x[1:]
        outputs = y[1:] # Todo: look into encoding multiple things ("tuple encodings") instead of this gross thing
        xOutput = shiftedForwadInputsOne+previous*self.nSymbols
        yOutput = outputs
        return xOutput, yOutput
        
        '''
        # given a problem index idx, first recover the associated a + b
        idx = self.ixes[idx]
        nd = 10**self.ndigit
        a = idx // nd
        b = idx %  nd
        c = a + b
        render = f'%0{self.ndigit}d%0{self.ndigit}d%0{self.ndigit+1}d' % (a,b,c) # e.g. 03+25=28 becomes "0325028" 
        dix = [int(s) for s in render] # convert each character to its token index
        # x will be input to GPT and y will be the associated expected outputs
        x = torch.tensor(dix[:-1], dtype=torch.long)
        y = torch.tensor(dix[1:], dtype=torch.long) # predict the next token in the sequence
        y[:self.ndigit*2-1] = -100 # we will only train in the output locations. -100 will mask loss to zero
        return x, y
        '''

In [1]:

import minGPT
from importlib import reload
from minGPT.mingpt import trainer
from minGPT.mingpt import model
reload(minGPT.mingpt.model)
reload(minGPT.mingpt.trainer)
from minGPT.mingpt.model import GPT, GPTConfig, GPT1Config
import gc
model = None
train_dataset = None
test_dataset = None
gc.collect()
torch.cuda.empty_cache()
gc.collect()
train_dataset = FastLearnAutomataDataset(nStates=2, nSymbols=2, split='train', sequenceLen=100, numSequences=6000000)
test_dataset = FastLearnAutomataDataset(nStates=2, nSymbols=2, split='test', sequenceLen=100, numSequences=2000)
print(train_dataset[0], train_dataset[1])
# initialize a baby GPT model
mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, 
                  n_layer=8, n_head=8, n_embd=64)
model = GPT(mconf)
#model.load_state_dict(torch.load("juniper_fit_actual_4_states_2"))
from minGPT.mingpt.trainer import Trainer, TrainerConfig
set_seed(27)

    

NameError: name 'torch' is not defined

In [38]:

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=100, batch_size=512, learning_rate=6e-5,
                      lr_decay=True, warmup_tokens=512, final_tokens=50*len(train_dataset)*(2+1),
                      num_workers=16)
trainer = Trainer(model, train_dataset, test_dataset, tconf)
trainer.train()

epoch 1 iter 11718: train loss 0.20796. lr 1.554741e-05: 100%|██████████| 11719/11719 [31:01<00:00,  6.30it/s]
10/20/2020 23:41:15 - INFO - minGPT.mingpt.trainer -   test loss: 0.194861
epoch 2 iter 11718: train loss 0.20348. lr 1.392521e-05: 100%|██████████| 11719/11719 [31:02<00:00,  6.29it/s]
10/21/2020 00:12:19 - INFO - minGPT.mingpt.trainer -   test loss: 0.193911
epoch 3 iter 10580: train loss 0.19852. lr 5.895954e-05:  90%|█████████ | 10581/11719 [28:00<03:00,  6.29it/s]


KeyboardInterrupt: 

In [39]:
ckpt_path = "juniper_128"
raw_model = model.module if hasattr(model, "module") else model
torch.save(raw_model.state_dict(), ckpt_path)

# seems 8layer, 8head, embed32 got stuck at around 0.5, but it's possible it could have gone further
# juniper_fit fit really well, n_layer=8, n_head=8, n_embd=64

In [None]:
'''
not pre-fit to 2, n_layer=8, n_head=8, n_embd=64
epoch 1 iter 2343: train loss 0.49225. lr 1.445926e-05: 100%|██████████| 2344/2344 [13:45<00:00,  2.84it/s]
10/20/2020 13:35:34 - INFO - minGPT.mingpt.trainer -   test loss: 0.498138
epoch 2 iter 2343: train loss 0.49982. lr 1.610073e-05: 100%|██████████| 2344/2344 [13:42<00:00,  2.85it/s]
10/20/2020 13:49:17 - INFO - minGPT.mingpt.trainer -   test loss: 0.473743
epoch 3 iter 2343: train loss 0.47190. lr 5.994085e-05: 100%|██████████| 2344/2344 [13:41<00:00,  2.85it/s]
10/20/2020 14:02:58 - INFO - minGPT.mingpt.trainer -   test loss: 0.592071
epoch 4 iter 2343: train loss 0.49541. lr 1.287954e-05: 100%|██████████| 2344/2344 [13:42<00:00,  2.85it/s]
10/20/2020 14:16:41 - INFO - minGPT.mingpt.trainer -   test loss: 0.498718
epoch 5 iter 2343: train loss 0.47400. lr 1.779652e-05: 100%|██████████| 2344/2344 [13:40<00:00,  2.86it/s]
10/20/2020 14:30:22 - INFO - minGPT.mingpt.trainer -   test loss: 0.514086
epoch 6 iter 2343: train loss 0.47976. lr 5.976367e-05: 100%|██████████| 2344/2344 [13:41<00:00,  2.85it/s]
10/20/2020 14:44:03 - INFO - minGPT.mingpt.trainer -   test loss: 0.396569
epoch 7 iter 2343: train loss 0.48898. lr 1.136731e-05: 100%|██████████| 2344/2344 [13:42<00:00,  2.85it/s]
10/20/2020 14:57:45 - INFO - minGPT.mingpt.trainer -   test loss: 0.506740
epoch 8 iter 2343: train loss 0.47964. lr 1.954042e-05: 100%|██████████| 2344/2344 [13:44<00:00,  2.84it/s]
10/20/2020 15:11:30 - INFO - minGPT.mingpt.trainer -   test loss: 0.464614
epoch 9 iter 2343: train loss 0.47039. lr 5.946917e-05: 100%|██████████| 2344/2344 [13:40<00:00,  2.86it/s]
10/20/2020 15:25:11 - INFO - minGPT.mingpt.trainer -   test loss: 0.503165
epoch 10 iter 2343: train loss 0.46504. lr 9.928526e-06: 100%|██████████| 2344/2344 [13:42<00:00,  2.85it/s]
10/20/2020 15:38:54 - INFO - minGPT.mingpt.trainer -   test loss: 0.479228
epoch 11 iter 2343: train loss 0.46503. lr 2.132556e-05: 100%|██████████| 2344/2344 [13:42<00:00,  2.85it/s]
10/20/2020 15:52:37 - INFO - minGPT.mingpt.trainer -   test loss: 0.493589
epoch 12 iter 2343: train loss 0.44574. lr 5.905849e-05: 100%|██████████| 2344/2344 [13:43<00:00,  2.85it/s]
10/20/2020 16:06:20 - INFO - minGPT.mingpt.trainer -   test loss: 0.436838
epoch 13 iter 2343: train loss 0.44981. lr 8.568867e-06: 100%|██████████| 2344/2344 [13:41<00:00,  2.85it/s]
10/20/2020 16:20:01 - INFO - minGPT.mingpt.trainer -   test loss: 0.445516
epoch 14 iter 2343: train loss 0.44436. lr 2.314489e-05: 100%|██████████| 2344/2344 [13:40<00:00,  2.86it/s]
10/20/2020 16:33:42 - INFO - minGPT.mingpt.trainer -   test loss: 0.409083
epoch 15 iter 2343: train loss 0.45118. lr 5.853326e-05: 100%|██████████| 2344/2344 [13:41<00:00,  2.85it/s]
10/20/2020 16:47:24 - INFO - minGPT.mingpt.trainer -   test loss: 0.320424
epoch 16 iter 2343: train loss 0.44524. lr 7.293692e-06: 100%|██████████| 2344/2344 [13:41<00:00,  2.85it/s]
10/20/2020 17:01:05 - INFO - minGPT.mingpt.trainer -   test loss: 0.412357
epoch 17 iter 2343: train loss 0.44509. lr 2.499124e-05: 100%|██████████| 2344/2344 [13:42<00:00,  2.85it/s]
10/20/2020 17:14:48 - INFO - minGPT.mingpt.trainer -   test loss: 0.498460
epoch 18 iter 656: train loss 0.43165. lr 6.000000e-06:  28%|██▊       | 657/2344 [03:50<09:52,  2.85it/s]

'''