# Curriculum Learning

In [19]:
import sys
sys.path.insert(0, '../../src/')

import numpy as np
import matplotlib.pyplot as plt
import pickle
import config
import torch
from tqdm.notebook import tqdm
from copy import copy, deepcopy
import cmath
import chess
from utils import saver, loader

from agents import *
from environments import *
from models import *
%matplotlib inline

np.set_printoptions(precision = 3)

print("PyTorch version:", torch.__version__)  
print("CUDA toolkit version PyTorch was built with:", torch.version.cuda)  
print("cuDNN version:", torch.backends.cudnn.version()) 
print("cuda available:", torch.cuda.is_available())

torch.set_float32_matmul_precision('high')

PyTorch version: 2.7.1+cu128
CUDA toolkit version PyTorch was built with: 12.8
cuDNN version: 90701
cuda available: True


In [20]:
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)
agent = Agent(board_logic = BoardLogic(), in_ch=18, ch=128, n_blocks=10)
print(sum(p.numel() for p in agent.online_net1.parameters() if p.requires_grad))
environment = Environment(max_num_moves=200,)

opt_list = [None, None]

model = Model(agent = agent,
               environment = environment,
               mem_capacity = 1000000,
               init_mem=False,
               batch_size = 512,
               num_warmup = 100000,
               policy_update = 2,
               tau = 0.01,
               temp_scaler = TemperatureScaler(temp_start=0.6, 
                                               temp_end=0.3, 
                                               temp_min=1e-5, 
                                               episode_decay=5000, 
                                               transition_decay=0.95),
               opt_list=opt_list,
               scaler=torch.amp.GradScaler("cuda")
             )

optimizer_grouped_parameters1 = group_decay_parameters(
    agent.online_net1,
    weight_decay=1e-5,
    no_decay=["bias", "GroupNorm.weight"],
    )

optimizer_grouped_parameters2 = group_decay_parameters(
    agent.online_net2,
    weight_decay=1e-5,
    no_decay=["bias", "GroupNorm.weight"],
    )

opt_list[0] = torch.optim.AdamW(optimizer_grouped_parameters1, lr=1e-4)
opt_list[1] = torch.optim.AdamW(optimizer_grouped_parameters2, lr=1e-4)


eval_agents = EvaluateAgents(agent1 = agent, 
                             agent2 = deepcopy(agent), 
                             environment = Environment(max_num_moves=200,), 
                             num_games=200,
                             temp = 0.25)


2984908


## Eval

In [10]:
#model = #load_checkpoint("model_conv_5000_episodes_core.pth", None, model)
agent2 = deepcopy(model.agent)
model = load_checkpoint("model_conv_5000_episodes_core.pth", None, model)
agent1 = deepcopy(model.agent)


eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=200,), 
                             num_games=100,
                             temp = 0.25)

results = eval_agents.evaluate()
print(results)

  0%|          | 0/100 [00:00<?, ?it/s]

{1: 39, -1: 33, 0: 28}


In [11]:
model = load_checkpoint("model_conv_5000_episodes_core.pth", None, model)
agent2 = deepcopy(model.agent)
model = load_checkpoint("model_conv_10000_episodes_core.pth", None, model)
agent1 = deepcopy(model.agent)


eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=200,), 
                             num_games=100,
                             temp = 0.25)

results = eval_agents.evaluate()
print(results)

  0%|          | 0/100 [00:00<?, ?it/s]

{1: 80, -1: 18, 0: 2}


In [12]:
model = load_checkpoint("model_conv_10000_episodes_core.pth", None, model)
agent2 = deepcopy(model.agent)
model = load_checkpoint("model_conv_15000_episodes_core.pth", None, model)
agent1 = deepcopy(model.agent)


eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=200,), 
                             num_games=100,
                             temp = 0.25)

results = eval_agents.evaluate()
print(results)

  0%|          | 0/100 [00:00<?, ?it/s]

{1: 71, -1: 17, 0: 12}


In [13]:
model = load_checkpoint("model_conv_15000_episodes_core.pth", None, model)
agent2 = deepcopy(model.agent)
model = load_checkpoint("model_conv_20000_episodes_core.pth", None, model)
agent1 = deepcopy(model.agent)


eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=200,), 
                             num_games=100,
                             temp = 0.25)

results = eval_agents.evaluate()
print(results)

  0%|          | 0/100 [00:00<?, ?it/s]

{1: 50, -1: 30, 0: 20}


In [15]:
model = load_checkpoint("model_conv_20000_episodes_core.pth", None, model)
agent2 = deepcopy(model.agent)
model = load_checkpoint("model_conv_25000_episodes_core.pth", None, model)
agent1 = deepcopy(model.agent)


eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=200,), 
                             num_games=100,
                             temp = 0.25)

results = eval_agents.evaluate()
print(results)

  0%|          | 0/100 [00:00<?, ?it/s]

{1: 57, -1: 25, 0: 18}


In [17]:
model = load_checkpoint("model_conv_25000_episodes_core.pth", None, model)
agent2 = deepcopy(model.agent)
model = load_checkpoint("model_conv_30000_episodes_core.pth", None, model)
agent1 = deepcopy(model.agent)


eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=200,), 
                             num_games=100,
                             temp = 0.25)

results = eval_agents.evaluate()
print(results)

  0%|          | 0/100 [00:00<?, ?it/s]

{1: 45, -1: 32, 0: 23}


In [18]:
model = load_checkpoint("model_conv_30000_episodes_core.pth", None, model)
agent2 = deepcopy(model.agent)
model = load_checkpoint("model_conv_35000_episodes_core.pth", None, model)
agent1 = deepcopy(model.agent)


eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=200,), 
                             num_games=100,
                             temp = 0.25)

results = eval_agents.evaluate()
print(results)

  0%|          | 0/100 [00:00<?, ?it/s]

{1: 45, -1: 33, 0: 22}


In [21]:
model = load_checkpoint("model_conv_35000_episodes_core.pth", None, model)
agent2 = deepcopy(model.agent)
model = load_checkpoint("model_conv_40000_episodes_core.pth", None, model)
agent1 = deepcopy(model.agent)


eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=200,), 
                             num_games=100,
                             temp = 0.25)

results = eval_agents.evaluate()
print(results)

  0%|          | 0/100 [00:00<?, ?it/s]

{1: 35, -1: 38, 0: 27}


In [22]:
model = load_checkpoint("model_conv_40000_episodes_core.pth", None, model)
agent2 = deepcopy(model.agent)
model = load_checkpoint("model_conv_45000_episodes_core.pth", None, model)
agent1 = deepcopy(model.agent)


eval_agents = EvaluateAgents(agent1 = agent1, 
                             agent2 = agent2, 
                             environment = Environment(max_num_moves=200,), 
                             num_games=100,
                             temp = 0.25)

results = eval_agents.evaluate()
print(results)

  0%|          | 0/100 [00:00<?, ?it/s]

{1: 46, -1: 28, 0: 26}
