# Curriculum Learning

In [1]:
import sys
sys.path.insert(0, '../../src/')

import numpy as np
import matplotlib.pyplot as plt
import pickle
import config
import torch
from tqdm.notebook import tqdm
from copy import copy, deepcopy
import cmath
import chess

from basis_gates import *
from agents import *
from environments import *
from models import *
%matplotlib inline

np.set_printoptions(precision = 3)

print("PyTorch version:", torch.__version__)  
print("CUDA toolkit version PyTorch was built with:", torch.version.cuda)  
print("cuDNN version:", torch.backends.cudnn.version()) 
print("cuda available:", torch.cuda.is_available())

torch.set_float32_matmul_precision('high')

PyTorch version: 2.8.0+cu126
CUDA toolkit version PyTorch was built with: 12.6
cuDNN version: 91002
cuda available: True


### FUMC

#### With Curriculum

### 10%

In [2]:
torch.manual_seed(42)
random.seed(42)
logger = Logger(sample_freq = 1000)

agent = Agent(board_logic = BoardLogic())


opt_list = [torch.optim.Adam(agent.online_net1.parameters(), lr=1e-6), 
            torch.optim.Adam(agent.online_net2.parameters(), lr=1e-6)]

model = Model(agent = agent,
               environment = Environment(max_num_moves=200,),
               mem_capacity = 1000000,
               batch_size = 4*1024,
               batch_size_min = 128,
               policy_update = 1,
               target_update = 25000,
               temp_constants = (2, 1, 1e-5, 100000),
               opt_list=opt_list,
               scaler=torch.amp.GradScaler("cuda")
             )

In [None]:
with torch.no_grad():
    board = chess.Board()
    board2 = chess.Board()
    board2.push(chess.Move.from_uci("e2e4"))
    board2.push(chess.Move.from_uci("e7e5"))

    board_logic = BoardLogic()
    state_dim = 64*76

    state1 = agent.board_logic.board_to_state(board).to(config.device)
    state2 = agent.board_logic.board_to_state(board2).to(config.device)

    state = torch.concat([state1, state2], dim=0)
    print(state.shape)
    Q = agent.forward(state)
    print(Q.shape)

    moves1 = torch.tensor([board_logic.move_to_action(m) for m in board.legal_moves], dtype=torch.long).to(config.device)
    moves2 = torch.tensor([board_logic.move_to_action(m) for m in board2.legal_moves], dtype=torch.long).to(config.device)

    mask = torch.zeros((2,state_dim), dtype=torch.bool).to(config.device)
    mask[0, moves1] = True
    mask[1, moves2] = True

    Q[~mask] = -1e9

    print(Q)

torch.Size([2, 12, 8, 8])
torch.Size([2, 4864])
tensor([[-1.0000e+09, -1.0000e+09, -1.0000e+09,  ..., -1.0000e+09,
         -1.0000e+09, -1.0000e+09],
        [-1.0000e+09, -1.0000e+09, -1.0000e+09,  ..., -1.0000e+09,
         -1.0000e+09, -1.0000e+09]])


In [None]:
model.train(num_episodes = 1000, logger = logger)

  0%|          | 0/1000 [00:00<?, ?it/s]

torch.Size([20, 4864])


/pytorch/aten/src/ATen/native/cuda/IndexKernelUtils.cu:16: vectorized_gather_kernel: block: [3,2,0], thread: [32,0,0] Assertion `ind >=0 && ind < ind_dim_size && "vectorized gather kernel index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernelUtils.cu:16: vectorized_gather_kernel: block: [3,2,0], thread: [33,0,0] Assertion `ind >=0 && ind < ind_dim_size && "vectorized gather kernel index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernelUtils.cu:16: vectorized_gather_kernel: block: [3,2,0], thread: [34,0,0] Assertion `ind >=0 && ind < ind_dim_size && "vectorized gather kernel index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernelUtils.cu:16: vectorized_gather_kernel: block: [3,2,0], thread: [35,0,0] Assertion `ind >=0 && ind < ind_dim_size && "vectorized gather kernel index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernelUtils.cu:16: vectorized_gather_kernel: block: [3,2,0], thread: [36,0,0] Assertion 

AcceleratorError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
