## Train Othello-GPT and save to `ckpts`

Use `jupyter nbconvert --execute --to notebook --allow-errors --ExecutePreprocessor.timeout=-1 train_gpt_othello.ipynb --inplace --output ckpts/checkpoint.ipynb` to run in background

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# make deterministic
# from mingpt.utils import set_seed
# set_seed(44)

In [3]:
from connect_four.connect_four import ConnectFour
from connect_four.connect_four_dataset import ConnectFourDataset, CharConnectFourDataset, DatasetPreprocessingConfig
import pickle
import time
from tqdm import tqdm
import torch
from mingpt.utils import sample
from mingpt.model import GPT, GPTConfig
from mingpt.trainer import Trainer, TrainerConfig

## Generate the dataset

In [4]:
# dataset = ConnectFourDataset(
#     data_size=110000,
#     train_size=110000,
#     rows_count=6,
#     columns_count=7
# )

# with open("connect_four/dataset/dataset_6x7_110000.pkl", "wb") as f:
#     pickle.dump(dataset.sequences, f)

## Load the existing dataset from a file

In [15]:
with open("connect_four/dataset/dataset_minmax_123_7383.pkl", "rb") as f:
    game_transcriptions = pickle.load(f)

## Create the data loader

In [16]:
cf_data = ConnectFourDataset(data_size=0, train_size=7138, games_to_use=game_transcriptions)
char_cf_dataset = CharConnectFourDataset(cf_data)
char_cf_dataset.chars, char_cf_dataset.config, char_cf_dataset.max_len

Dataset created has 7138 sequences, 8 unique words.


([-100, 0, 1, 2, 3, 4, 5, 6],
 DatasetPreprocessingConfig(to_model_repr={-100: 0, 0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7}, from_model_repr={0: -100, 1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6}, block_size=41, vocab_size=8),
 42)

## Initialize model, and optionally train it

In [17]:
device = torch.cuda.current_device()
mconf = GPTConfig(char_cf_dataset.config.vocab_size, char_cf_dataset.config.block_size, n_layer=2, n_head=8, n_embd=80)
model = GPT(mconf).to(device)

In [18]:
max_epochs = 20
# initialize a trainer instance and kick off training
t_start = time.strftime("_%Y%m%d_%H%M%S")
tconf = TrainerConfig(
    max_epochs=max_epochs, 
    batch_size=10,
    learning_rate=5e-4,
    lr_decay=True, 
    warmup_tokens=len(char_cf_dataset)*char_cf_dataset.config.block_size*5, 
    final_tokens=len(char_cf_dataset)*char_cf_dataset.config.block_size*max_epochs,
    num_workers=0, 
    ckpt_path=f"./ckpts/minmax_models/gpt_at{t_start}.ckpt", 
)
trainer = Trainer(model, char_cf_dataset, None, tconf)
device = trainer.device
trainer.train()

epoch 1 iter 713: train loss 1.19634. lr 1.000000e-04: 100%|██████████| 714/714 [00:08<00:00, 87.30it/s]
epoch 2 iter 713: train loss 0.73517. lr 2.000000e-04: 100%|██████████| 714/714 [00:08<00:00, 86.10it/s]
epoch 3 iter 713: train loss 0.74214. lr 3.000000e-04: 100%|██████████| 714/714 [00:08<00:00, 85.68it/s]
epoch 4 iter 713: train loss 0.51858. lr 4.000000e-04: 100%|██████████| 714/714 [00:08<00:00, 86.99it/s]
epoch 5 iter 713: train loss 0.47359. lr 5.000000e-04: 100%|██████████| 714/714 [00:08<00:00, 87.04it/s]
epoch 6 iter 713: train loss 0.59881. lr 4.945369e-04: 100%|██████████| 714/714 [00:08<00:00, 87.66it/s]
epoch 7 iter 713: train loss 0.61803. lr 4.783864e-04: 100%|██████████| 714/714 [00:08<00:00, 87.97it/s]
epoch 8 iter 713: train loss 0.55427. lr 4.522542e-04: 100%|██████████| 714/714 [00:08<00:00, 88.25it/s]
epoch 9 iter 713: train loss 0.42229. lr 4.172827e-04: 100%|██████████| 714/714 [00:08<00:00, 87.39it/s]
epoch 10 iter 713: train loss 0.53981. lr 3.750000e-04:

## Load trained model from `ckpts`

In [7]:
load_res = model.load_state_dict(torch.load("./ckpts/gpt_at_20230618_093325.ckpt"))
if torch.cuda.is_available():
    device = torch.cuda.current_device()
    model = model.to(device)

## Validate it: for what percentage of all partial games in validation set, the top-1 prediction is legal

In [19]:
def validate_model(
        dataset: ConnectFourDataset,
        dataset_config: DatasetPreprocessingConfig,
        model: GPT
):
    total_nodes = 0
    success_nodes = 0

    bar = tqdm(dataset.valid)
    for whole_game in bar:
        length_of_whole_game = len(whole_game)
        for length_of_partial_game in range(1, length_of_whole_game):
            total_nodes += 1
            context = whole_game[:length_of_partial_game]
            x = torch.tensor([dataset_config.to_model_repr[s] for s in context], dtype=torch.long)[None, ...].to(device)
            y = sample(model, x, 1, temperature=1.0)[0]
            completion = [dataset_config.from_model_repr[int(i)] for i in y if i != -1]
            game_repr = ConnectFour()
            piece = 1
            for move in context:
                game_repr.apply_move(piece, move)
                piece = 2 if piece == 1 else 1
            if game_repr.is_move_possible(completion[-1]):
                success_nodes += 1
        bar.set_description(f"{success_nodes/total_nodes*100:.2f}% pass rate: {success_nodes}/{total_nodes} among all searched nodes")
    print(f"{success_nodes/total_nodes*100:.2f}% pass rate: {success_nodes}/{total_nodes} among all searched nodes")
    return success_nodes/total_nodes*100

In [20]:
validate_model(dataset=cf_data, dataset_config=char_cf_dataset.config, model=model)

99.97% pass rate: 9745/9748 among all searched nodes: 100%|██████████| 246/246 [00:47<00:00,  5.18it/s] 

99.97% pass rate: 9745/9748 among all searched nodes





99.96922445629873