In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cd drive/MyDrive/chess_ai_colab

In [None]:
!pip install torch torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
!pip install chess

Collecting chess
  Downloading chess-1.11.2.tar.gz (6.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25l[?25hdone
  Created wheel for chess: filename=chess-1.11.2-py3-none-any.whl size=147775 sha256=0f91b5f2b5bb706ca6d6684d45414144f369c6c00c2b7c8cce68eceba49648d6
  Stored in directory: /root/.cache/pip/wheels/fb/5d/5c/59a62d8a695285e59ec9c1f66add6f8a9ac4152499a2be0113
Successfully built chess
Installing collected packages: chess
Successfully installed chess-1.11.2


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import chess
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [None]:
INPUT_PATH = "/content/drive/MyDrive/chess_ai_colab/data/lichess_db_standard_rated_2015-08.pgn.zst"
OUTPUT_PGN = "/content/drive/MyDrive/chess_ai_colab/data/lichess_db_standard_rated_2015-08.pgn"

 #Giải nén .zst → .pgn
import zstandard as zstd
import shutil

with open(INPUT_PATH, 'rb') as compressed:
    dctx = zstd.ZstdDecompressor()
    with dctx.stream_reader(compressed) as reader, open(OUTPUT_PGN, 'wb') as out:
        shutil.copyfileobj(reader, out)
print("Giải nén xong:", OUTPUT_PGN)

Giải nén xong: /content/drive/MyDrive/chess_ai_colab/data/lichess_db_standard_rated_2015-08.pgn


In [None]:
# Hàm chuyển Board → tensor (12×8×8) và move → index (64×64 = 4096)
import chess
import numpy as np

def board_to_tensor(board: chess.Board):
    planes = np.zeros((12, 8, 8), dtype=np.float32)
    for square, piece in board.piece_map().items():
        idx = (piece.piece_type - 1) + (0 if piece.color == chess.WHITE else 6)
        rank = chess.square_rank(square)
        file = chess.square_file(square)
        planes[idx][7 - rank][file] = 1.0
    return planes

def move_to_index(move: chess.Move):
    return move.from_square * 64 + move.to_square

In [None]:
# Hàm chuyển Board → tensor (12×8×8) và move → index (64×64 = 4096)
import chess
import numpy as np

def board_to_tensor(board: chess.Board):
    planes = np.zeros((12, 8, 8), dtype=np.float32)
    for square, piece in board.piece_map().items():
        idx = (piece.piece_type - 1) + (0 if piece.color == chess.WHITE else 6)
        rank = chess.square_rank(square)
        file = chess.square_file(square)
        planes[idx][7 - rank][file] = 1.0
    return planes

def move_to_index(move: chess.Move):
    return move.from_square * 64 + move.to_square

# Đọc PGN và tạo dataset supervised
import chess.pgn

MAX_GAMES = 5000          # có thể tăng lên 20000–50000 tuỳ tài nguyên
MAX_MOVES_PER_GAME = 40   # chỉ lấy tối đa 40 nước đầu mỗi ván

states = []
policies = []

with open(OUTPUT_PGN, 'r', errors='ignore') as pgn_file:
    game_count = 0
    while game_count < MAX_GAMES:
        game = chess.pgn.read_game(pgn_file)
        if game is None:
            break
        game_count += 1
        board = game.board()
        move_count = 0
        for node in game.mainline():
            move = node.move
            if move is None:
                continue
            state_tensor = board_to_tensor(board)
            idx = move_to_index(move)
            policy_vec = np.zeros(4096, dtype=np.float32)
            policy_vec[idx] = 1.0
            states.append(state_tensor)
            policies.append(policy_vec)
            board.push(move)
            move_count += 1
            if move_count >= MAX_MOVES_PER_GAME:
                break
        if game_count % 500 == 0:
            print(f"Đã xử lý {game_count} ván...")

states_arr = np.array(states, dtype=np.float32)      # (N,12,8,8)
policies_arr = np.array(policies, dtype=np.float32)  # (N,4096)
DATASET_PATH = "/content/drive/MyDrive/chess_ai_colab/data/lichess_dataset.npz"
np.savez(DATASET_PATH, states=states_arr, policies=policies_arr)
print("Lưu dataset xong:", DATASET_PATH)

Đã xử lý 500 ván...
Đã xử lý 1000 ván...
Đã xử lý 1500 ván...
Đã xử lý 2000 ván...
Đã xử lý 2500 ván...
Đã xử lý 3000 ván...
Đã xử lý 3500 ván...
Đã xử lý 4000 ván...
Đã xử lý 4500 ván...
Đã xử lý 5000 ván...
Lưu dataset xong: /content/drive/MyDrive/chess_ai_colab/data/lichess_dataset.npz


In [None]:
#ResidualBlock (giữ nguyên, nhận param channels)
class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(channels)

    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out)) + residual
        out = F.relu(out)
        return out


In [None]:
# PolicyNet dày hơn: 20 ResNet blocks, num_channels=256
class PolicyNet(nn.Module):
    def __init__(self, num_res_blocks=20, num_channels=256, action_size=4096):
        """
        - num_res_blocks: 20 (tăng rất sâu)
        - num_channels: 256 (tăng kênh)
        - action_size: 4096 (64×64 mapping)
        """
        super(PolicyNet, self).__init__()
        # Layer đầu vào: từ 12 plane → num_channels
        self.conv_in = nn.Conv2d(12, num_channels, kernel_size=3, padding=1, bias=False)
        self.bn_in = nn.BatchNorm2d(num_channels)

        # Tạo 20 Residual Blocks
        self.res_blocks = nn.ModuleList([ResidualBlock(num_channels) for _ in range(num_res_blocks)])

        # Policy head: 1×1 conv giảm channels xuống 32, rồi FC → action_size
        self.conv_policy = nn.Conv2d(num_channels, 32, kernel_size=1)
        self.bn_policy = nn.BatchNorm2d(32)
        self.fc_policy = nn.Linear(32 * 8 * 8, action_size)

    def forward(self, x):
        # x: (batch, 12, 8, 8)
        out = F.relu(self.bn_in(self.conv_in(x)))
        for block in self.res_blocks:
            out = block(out)
        p = F.relu(self.bn_policy(self.conv_policy(out)))  # (batch, 32, 8, 8)
        p = p.view(p.size(0), -1)                           # (batch, 32*8*8)
        p = self.fc_policy(p)                               # (batch, action_size)
        return p

In [None]:
#Load dataset từ file .npz
data = np.load(DATASET_PATH)
states = data["states"]      # (N,12,8,8)
policies = data["policies"]  # (N,4096)

# Dataset class
class LichessDataset(Dataset):
    def __init__(self, states, policies):
        self.states = torch.from_numpy(states)       # (N,12,8,8)
        self.policies = torch.from_numpy(policies)   # (N,4096)
    def __len__(self):
        return self.states.size(0)
    def __getitem__(self, idx):
        return self.states[idx], self.policies[idx]

dataset = LichessDataset(states, policies)

# DataLoader với batch_size lớn hơn (ví dụ 128)
batch_size = 128
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2)
print("Dataset size:", len(dataset), "Batch size:", batch_size)


Dataset size: 188098 Batch size: 128


In [None]:
# Khởi tạo model, optimizer, criterion
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PolicyNet(num_res_blocks=20, num_channels=256, action_size=4096).to(device)

learning_rate = 0.0005  # giảm so với 0.001 ban đầu
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()

# Training loop
num_epochs = 20
start_epoch = 0

# Resume
# checkpoint = torch.load("/content/drive/MyDrive/chess_ai_colab/checkpoints/ckpt_epoch_5.pt", map_location=device)
# model.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# start_epoch = checkpoint['epoch']

for epoch in range(start_epoch, num_epochs):
    model.train()
    running_loss = 0.0
    for batch_states, batch_policies in dataloader:
        batch_states = batch_states.to(device)  # (B,12,8,8)
        idxs = torch.argmax(batch_policies, dim=1).to(device)  # (B,)

        optimizer.zero_grad()
        logits = model(batch_states)  # (B,4096)
        loss = criterion(logits, idxs)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    avg_loss = running_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

    # 5.3. Lưu checkpoint mỗi 5 epoch
    if (epoch + 1) % 5 == 0:
        ckpt_path = f"/content/drive/MyDrive/chess_ai_colab/checkpoints/ckpt_epoch_{epoch+1}.pt"
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_loss
        }, ckpt_path)
        print("Saved checkpoint:", ckpt_path)

# 5.4. Lưu model cuối cùng
MODEL_PATH = "/content/drive/MyDrive/chess_ai_colab/models/policy_supervised_final.pt"
torch.save(model.state_dict(), MODEL_PATH)
print("Đã lưu model cuối:", MODEL_PATH)


Epoch [1/20], Loss: 4.2104
Epoch [2/20], Loss: 3.0144
Epoch [3/20], Loss: 2.6079
Epoch [4/20], Loss: 2.3452
Epoch [5/20], Loss: 2.1418
Saved checkpoint: /content/drive/MyDrive/chess_ai_colab/checkpoints/ckpt_epoch_5.pt
Epoch [6/20], Loss: 1.9637
Epoch [7/20], Loss: 1.8119
Epoch [8/20], Loss: 1.6801
Epoch [9/20], Loss: 1.5595
Epoch [10/20], Loss: 1.4565
Saved checkpoint: /content/drive/MyDrive/chess_ai_colab/checkpoints/ckpt_epoch_10.pt
Epoch [11/20], Loss: 1.3604
Epoch [12/20], Loss: 1.2767
Epoch [13/20], Loss: 1.1978
Epoch [14/20], Loss: 1.1347
Epoch [15/20], Loss: 1.0695
Saved checkpoint: /content/drive/MyDrive/chess_ai_colab/checkpoints/ckpt_epoch_15.pt
Epoch [16/20], Loss: 1.0203
Epoch [17/20], Loss: 0.9738
Epoch [18/20], Loss: 0.9270
Epoch [19/20], Loss: 0.8896
Epoch [20/20], Loss: 0.8591
Saved checkpoint: /content/drive/MyDrive/chess_ai_colab/checkpoints/ckpt_epoch_20.pt
Đã lưu model cuối: /content/drive/MyDrive/chess_ai_colab/models/policy_supervised_final.pt
