# Self-Play

In [1]:
import sys
sys.path.insert(0, '../../src/')

import numpy as np
import matplotlib.pyplot as plt
import pickle
import config
import torch
from tqdm.notebook import tqdm
from copy import copy, deepcopy
import cmath
import chess
from utils import *
from evals import *

from agents import *
from environments import *
from models import *
%matplotlib inline

np.set_printoptions(precision = 3)

print("PyTorch version:", torch.__version__)  
print("CUDA toolkit version PyTorch was built with:", torch.version.cuda)  
print("cuDNN version:", torch.backends.cudnn.version()) 
print("cuda available:", torch.cuda.is_available())

torch.set_float32_matmul_precision('high')

PyTorch version: 2.7.1+cu128
CUDA toolkit version PyTorch was built with: 12.8
cuDNN version: 90701
cuda available: True


In [2]:
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)
agent = Agent(board_logic = BoardLogic(max_num_moves=100), 
              in_ch=20, 
              ch=192, 
              n_blocks=15,
              sample_policy=eps_greedy_policy,)

print(sum(p.numel() for p in agent.online_net1.parameters() if p.requires_grad))
environment = Environment(max_num_moves=100,
                          filter_blunder=False, # causes a lot of draw in early self-play of on
                          )

opt_list = [None, None]

model = Model(agent = agent,
               environment = environment,
               mem_capacity = 1000000,
               init_mem = True,
               batch_size = 1024,
               num_warmup = 100000,
               policy_update = 2,
               tau = 0.01,
               temp_scaler = TemperatureScaler(temp_start=0.5, 
                                               temp_end=0.25, 
                                               temp_min=1e-4, 
                                               episode_decay=10000, 
                                               transition_decay=0.95),
               opt_list=opt_list,
               scaler=torch.amp.GradScaler("cuda")
             )

optimizer_grouped_parameters1 = group_decay_parameters(
    agent.online_net1,
    weight_decay=1e-5,
    no_decay=["bias", "GroupNorm.weight"],
    )

optimizer_grouped_parameters2 = group_decay_parameters(
    agent.online_net2,
    weight_decay=1e-5,
    no_decay=["bias", "GroupNorm.weight"],
    )

opt_list[0] = torch.optim.AdamW(optimizer_grouped_parameters1, lr=1e-4)
opt_list[1] = torch.optim.AdamW(optimizer_grouped_parameters2, lr=1e-4)


10014220


In [3]:
save_core(model, filename="../models/" + "model_large_0_episodes_core.pth")

In [4]:
agent1 = model.agent
agent2 = deepcopy(agent1)
eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=100, 
                                                       filter_blunder=False), 
                             num_games=500,
                             temp = 0.2)

model.train(num_episodes = 5000, 
            evaluate_agents = eval_agents,
            freq=1000)

save_core(model, filename="../models/" + "model_large_5000_episodes_core.pth")
save_memory(model, filename="../models/" + "model_large_5000_episodes_memory.pth")

  0%|          | 0/5000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

{1: 129, -1: 66, 0: 305} tensor(0.0003, device='cuda:0') 0.048567857985673354


  0%|          | 0/500 [00:00<?, ?it/s]

{1: 134, -1: 101, 0: 265} tensor(0.0005, device='cuda:0') 0.11042577611765005


  0%|          | 0/500 [00:00<?, ?it/s]

{1: 267, -1: 71, 0: 162} tensor(0.0014, device='cuda:0') 0.13696066740926782


  0%|          | 0/500 [00:00<?, ?it/s]

{1: 373, -1: 41, 0: 86} tensor(0.0015, device='cuda:0') 0.16199854082234505


  0%|          | 0/500 [00:00<?, ?it/s]

{1: 387, -1: 28, 0: 85} tensor(0.0010, device='cuda:0') 0.17782513058604035


In [None]:
agent1 = model.agent
agent2 = deepcopy(agent1)
eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=100, 
                                                       filter_blunder=False), 
                             num_games=500,
                             temp = 0.2)

model.train(num_episodes = 5000, 
            evaluate_agents = eval_agents,
            freq=1000)

save_core(model, filename="../models/" + "model_large_10000_episodes_core.pth")
save_memory(model, filename="../models/" + "model_large_10000_episodes_memory.pth")

  0%|          | 0/5000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

{1: 268, -1: 115, 0: 117} tensor(0.0018, device='cuda:0') 0.17753523522997067


  0%|          | 0/500 [00:00<?, ?it/s]

{1: 361, -1: 54, 0: 85} tensor(0.0003, device='cuda:0') 0.16112259423026962


  0%|          | 0/500 [00:00<?, ?it/s]

{1: 396, -1: 33, 0: 71} tensor(0.0003, device='cuda:0') 0.1684518355259891


  0%|          | 0/500 [00:00<?, ?it/s]

{1: 410, -1: 34, 0: 56} tensor(0.0010, device='cuda:0') 0.1395181512227282


In [None]:
agent1 = model.agent
agent2 = deepcopy(agent1)
eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=100, 
                                                       filter_blunder=False), 
                             num_games=500,
                             temp = 0.2)

model.train(num_episodes = 5000, 
            evaluate_agents = eval_agents,
            freq=1000)

save_core(model, filename="../models/" + "model_large_15000_episodes_core.pth")
save_memory(model, filename="../models/" + "model_large_15000_episodes_memory.pth")

In [None]:
agent1 = model.agent
agent2 = deepcopy(agent1)
eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=100, 
                                                       filter_blunder=False), 
                             num_games=500,
                             temp = 0.2)

model.train(num_episodes = 5000, 
            evaluate_agents = eval_agents,
            freq=1000)

save_core(model, filename="../models/" + "model_large_20000_episodes_core.pth")
save_memory(model, filename="../models/" + "model_large_20000_episodes_memory.pth")