# Curriculum Learning

In [1]:
import sys
sys.path.insert(0, '../../src/')

import numpy as np
import matplotlib.pyplot as plt
import pickle
import config
import torch
from tqdm.notebook import tqdm
from copy import copy, deepcopy
import cmath
import chess
from utils import saver, loader, load_checkpoint
from evals import EvaluateAgents

from agents import BoardLogic, Agent
from environments import *
from models import *
%matplotlib inline

np.set_printoptions(precision = 3)

print("PyTorch version:", torch.__version__)  
print("CUDA toolkit version PyTorch was built with:", torch.version.cuda)  
print("cuDNN version:", torch.backends.cudnn.version()) 
print("cuda available:", torch.cuda.is_available())

torch.set_float32_matmul_precision('high')

PyTorch version: 2.7.1+cu128
CUDA toolkit version PyTorch was built with: 12.8
cuDNN version: 90701
cuda available: True


In [17]:
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)
agent = Agent(board_logic = BoardLogic(), 
              in_ch=20, 
              ch=128, 
              n_blocks=10,
              sample_policy=eps_greedy_policy,)
print(sum(p.numel() for p in agent.online_net1.parameters() if p.requires_grad))
environment = Environment(max_num_moves=200,)

opt_list = [None, None]

model = Model(agent = agent,
               environment = environment,
               mem_capacity = 1000000,
               init_mem=False,
               batch_size = 512,
               num_warmup = 100000,
               policy_update = 2,
               tau = 0.01,
               temp_scaler = TemperatureScaler(temp_start=0.6, 
                                               temp_end=0.3, 
                                               temp_min=1e-5, 
                                               episode_decay=5000, 
                                               transition_decay=0.95),
               opt_list=opt_list,
               scaler=torch.amp.GradScaler("cuda")
             )

optimizer_grouped_parameters1 = group_decay_parameters(
    agent.online_net1,
    weight_decay=1e-5,
    no_decay=["bias", "GroupNorm.weight"],
    )

optimizer_grouped_parameters2 = group_decay_parameters(
    agent.online_net2,
    weight_decay=1e-5,
    no_decay=["bias", "GroupNorm.weight"],
    )

opt_list[0] = torch.optim.AdamW(optimizer_grouped_parameters1, lr=1e-4)
opt_list[1] = torch.optim.AdamW(optimizer_grouped_parameters2, lr=1e-4)


2987212


In [18]:
def evaluate_agents(model, filename1, filename2, num_games=1000, temp=0.1, transition_decay=0.95):

    if not filename1 is None:
        model = load_checkpoint("../models/" + filename1, None, model)
    agent1 = deepcopy(model.agent)
    
    if not filename2 is None:
        model = load_checkpoint("../models/" + filename2, None, model)
    agent2 = deepcopy(model.agent)
    
    eval_agents = EvaluateAgents(agent1 = agent1, 
                                 agent2 = agent2, 
                                 environment = Environment(max_num_moves=100,), 
                                 num_games=num_games,
                                 temp = temp, 
                                 transition_decay=transition_decay)
    
    results = eval_agents.evaluate()
    return results

## Eval

In [19]:
results = evaluate_agents(model, 
                          "model_eps_5000_episodes_core.pth", 
                          "model_eps_0_episodes_core.pth",
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 610, -1: 88, 0: 302}


In [20]:
results = evaluate_agents(model, 
                          "model_eps_10000_episodes_core.pth", 
                          "model_eps_5000_episodes_core.pth",  
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 879, -1: 54, 0: 67}


In [21]:
results = evaluate_agents(model, 
                          "model_eps_15000_episodes_core.pth", 
                          "model_eps_10000_episodes_core.pth",  
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 531, -1: 107, 0: 362}


In [22]:
results = evaluate_agents(model, 
                          "model_eps_20000_episodes_core.pth", 
                          "model_eps_15000_episodes_core.pth",  
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 446, -1: 156, 0: 398}


In [25]:
results = evaluate_agents(model, 
                          "model_eps_25000_episodes_core.pth", 
                          "model_eps_20000_episodes_core.pth",  
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 440, -1: 173, 0: 387}


In [26]:
results = evaluate_agents(model, 
                          "model_eps_30000_episodes_core.pth", 
                          "model_eps_25000_episodes_core.pth",  
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 405, -1: 246, 0: 349}


In [27]:
results = evaluate_agents(model, 
                          "model_eps_35000_episodes_core.pth", 
                          "model_eps_30000_episodes_core.pth",  
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 409, -1: 263, 0: 328}


In [29]:
results = evaluate_agents(model, 
                          "model_eps_40000_episodes_core.pth", 
                          "model_eps_35000_episodes_core.pth",  
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 383, -1: 315, 0: 302}


In [30]:
results = evaluate_agents(model, 
                          "model_eps_45000_episodes_core.pth", 
                          "model_eps_40000_episodes_core.pth",  
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 399, -1: 275, 0: 326}


In [31]:
results = evaluate_agents(model, 
                          "model_eps_50000_episodes_core.pth", 
                          "model_eps_45000_episodes_core.pth",  
                          num_games=1000, 
                          temp=0.2,
                          transition_decay=0.95)
print(results)

  0%|          | 0/1000 [00:00<?, ?it/s]

{1: 420, -1: 283, 0: 297}
