#### Self-play to generate training data
Want to have one stronger and one weaker agent to maximise win/loss rate (as opposed to draw or timeout).

In [1]:
import os, time, torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# Same base model
model_kwargs = {'nlayers': 6, 'nheads': 3, 'embed_dim': 18, 'dk': 5, 'device': device,'load_path': os.path.join('data','output','baseline', 'model.pt')}
# Different strength parameters
agent0_spec = {'type': 'transformer', 'kwargs': model_kwargs, 'num_simgames': 150, 'max_simmoves': 4, 'C': 1, 'p': 0.3, 'k': 3}
agent1_spec = {'type': 'transformer', 'kwargs': model_kwargs, 'num_simgames':  20, 'max_simmoves': 2, 'C': 1, 'p': 0.4, 'k': 4}

# Script should be able to access namespace including the following:
# num_workers          # Required: number of python processes to spawn.
# num_tournaments      # Required: number of tournaments to play - one worker per tournament
# agents_spec          # Required: specification for each agent: {'type':'dummy', 'kwargs':kwargs, 'num_simgames':#, 'max_simmoves':#, 'C':#, 'p':#, 'k':#}
# num_games            # Required: number of games to play in sequence each tournament
# starting_state       # Can be None: starting state for tournaments in the form of (board, color_toplay)
# max_moves            # Required: max number of master-level game moves per game
# save                 # Required: boolean - whether to save the self-play results or not
# result_dest          # Optional based on "save": destination directory for tournament results to be saved

num_workers = 2
num_tournaments = 10000
agents_spec = [agent0_spec, agent1_spec]
num_games = 1
starting_state = None
max_moves = 200
save = True
result_dest = os.path.join('data','output','round1')

# Do this
start = time.time()
%run -i "chess_selfplay.py"
duration = time.time() - start
print(
    f'''
    Played {num_tournaments} tournaments at {num_games*2} games each, {num_tournaments*num_games*2:,.0f} games total. 
    Completed in {int(duration // 60)} minutes, {int(duration % 60)} seconds.
    '''
)

cuda


#### Extract and store found checkmate positions - these can be included in all future training datasets

In [None]:
import os, pickle
from collections import Counter
from tqdm import tqdm
root = os.path.join('data', 'output', 'round1')
chkm_dest = os.path.join('data', 'output', 'checkmates')

tournamentfiles = [f for f in os.listdir(root) if f.startswith('tmnt_') and f.endswith('.pkl')]
agent0_points = []
agent1_points = []

for file in tqdm(tournamentfiles):
    with open(os.path.join(root, file), 'rb') as pkl:
        tourn = pickle.load(pkl)
    
    checkmates = []
    for i, pair in tourn.items():
        agent0_points.append(pair['a0wa1b']['white']['points'])
        agent1_points.append(pair['a0wa1b']['black']['points'])
        agent0_points.append(pair['a1wa0b']['black']['points'])
        agent1_points.append(pair['a1wa0b']['white']['points'])
        for game in pair:
            for color in game:
                if abs(game[color]['points']) == 1:
                    token, board = game[color]['moves'][-1]
                    points = game[color]['points']
                    checkmates.append((token, board, points))
    
    for i,checkmate in enumerate(checkmates):
        file_trunk = file.split('.')[0]
        with open(os.path.join(chkm_dest, f'{file_trunk}_{i}.pkl'), 'wb') as pkl:
            pickle.dump(checkmate, pkl)

print(f'{len(checkmates)} games ended in checkmate out of {len(tournamentfiles)*2} games played.')
Counter(agent0_points)

#### Build and train transformer model on self-play dataset

In [None]:
import torch, os
# from torch import nn
from torch.utils.data import DataLoader
from chess_model import TransformerModel, ChessDataset, TanhLoss, train
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

model_kwargs = {
    'nlayers': 6,
    'nheads': 3, 
    'embed_dim': 18, # must be divisible by 2 (for PE) * nheads
    'dk': 5, 
    'device': device,
    'load_path': None
}

model = TransformerModel(**model_kwargs)
optimizer = torch.optim.Adam(model.parameters(), lr=0, weight_decay=0)
loss_fn = TanhLoss()

root = os.path.join('data','output','baseline')
model_dest = os.path.join(root, 'model.pt')
dataset = ChessDataset(root, device)
train_set, test_set = torch.utils.data.random_split(dataset, [int(len(dataset)*0.8), len(dataset) - int(len(dataset)*0.8)])
train_loader = DataLoader(train_set, batch_size=1000, shuffle=True, num_workers=0)
test_loader = DataLoader(test_set, batch_size=1000, shuffle=True, num_workers=0)
print(f'Training on {len(train_set)} examples in {len(train_loader)} batches.')

# Train on the data
model = train(model, loss_fn, optimizer, train_loader, test_loader, warmup_passes=4, max_lr=1e-4, save_dir=model_dest, stopping=5)

#### Self-play to evaluate trained model performance

In [None]:
import os, time, torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# Script should be able to access namespace including the following:
# num_workers          # Required: number of python processes to spawn.
# num_tournaments      # Required: number of tournaments to play - one worker per tournament
# agents_spec          # Required: specification for each agent: {'type':'dummy', 'kwargs':kwargs, 'num_simgames':#, 'max_simmoves':#, 'C':#, 'p':#, 'k':#}
# num_games            # Required: number of games to play in sequence each tournament
# starting_state       # Can be None: starting state for tournaments in the form of (board, color_toplay)
# max_moves            # Required: max number of master-level game moves per game
# save                 # Required: boolean - whether to save the self-play results or not
# result_dest          # Optional based on "save": destination directory for tournament results to be saved

model_kwargs = {
    'nlayers': 6,
    'nheads': 3, 
    'embed_dim': 18, # must be divisible by 2 (for PE) * nheads
    'dk': 5, 
    'device': device,
    'load_path': os.path.join('data','output','baseline', 'model.pt')
}

agent0_spec = {'type': 'transformer', 'kwargs': model_kwargs, 'num_simgames': 100, 'max_simmoves': 3, 'C': 0.5, 'p': 0.3, 'k': 3}
agent1_spec = {'type': None}

num_workers = 3
num_tournaments = 10000
agents_spec = [agent0_spec, agent1_spec]
num_games = 1
starting_state = None
max_moves = 200
save = True
result_dest = os.path.join('data','output','baseline','eval')

# Do this
start = time.time()
%run -i "chess_selfplay.py"
duration = time.time() - start
print(
    f'''
    Played {num_tournaments} tournaments at {num_games*2} games each, {num_tournaments*num_games*2:,.0f} games total. 
    Completed in {int(duration // 60)} minutes, {int(duration % 60)} seconds.
    '''
)

In [None]:
import os, pickle
from tqdm import tqdm
from collections import Counter
root = os.path.join('data', 'output', 'baseline','eval')
chkm_dest = os.path.join('data', 'output', 'checkmates')

tournamentfiles = [f for f in os.listdir(root) if f.startswith('tmnt_') and f.endswith('.pkl')]
tournaments = []

for file in tqdm(tournamentfiles):
    with open(os.path.join(root, file), 'rb') as pkl:
        tourn = pickle.load(pkl)
        tournaments.append(tourn)
    
    checkmates = []
    for i, pair in tourn.items():
        for game in pair:
            for color in game:
                if abs(game[color]['points']) == 1:
                    token, board = game[color]['moves'][-1]
                    points = game[color]['points']
                    checkmates.append((token, board, points))
    
    for i,checkmate in enumerate(checkmates):
        file_trunk = file.split('.')[0]
        with open(os.path.join(chkm_dest, f'{file_trunk}_{i}.pkl'), 'wb') as pkl:
            pickle.dump(checkmate, pkl)

agent0_points = []
agent1_points = []

for tourn in tournaments:
    for i, pair in tourn.items():
        agent0_points.append(pair[0]['white']['points'])
        agent1_points.append(pair[0]['black']['points'])
        agent0_points.append(pair[1]['black']['points'])
        agent1_points.append(pair[1]['white']['points'])

Counter(agent0_points)