In [2]:
!pip install sgfmill numpy

Collecting sgfmill
  Downloading sgfmill-1.1.1-py3-none-any.whl.metadata (1.7 kB)
Downloading sgfmill-1.1.1-py3-none-any.whl (27 kB)
Installing collected packages: sgfmill
Successfully installed sgfmill-1.1.1


In [3]:
import numpy as np
from sgfmill import sgf, boards

# Kích thước bàn cờ
BOARD_SIZE = 19
# Số lượng history planes (8 cho ta + 8 cho địch = 16)
HISTORY_LENGTH = 8

def make_input_planes(game_state, color, history_states):
    """
    Tạo ra tensor 17x19x19
    color: 'b' hoặc 'w' (người đang đi nước này)
    history_states: List các trạng thái bàn cờ trước đó
    """
    features = np.zeros((17, BOARD_SIZE, BOARD_SIZE), dtype=np.float32)

    current_player_color = color
    opponent_color = 'w' if color == 'b' else 'b'

    # --- 16 Kênh Lịch sử (8 Ta + 8 Địch) ---
    # Lấy tối đa 8 trạng thái gần nhất, nếu không đủ thì padding số 0
    recent_states = history_states[-HISTORY_LENGTH:]
    # Đảo ngược để trạng thái mới nhất nằm đầu
    recent_states = recent_states[::-1]

    for i, state in enumerate(recent_states):
        # Kênh quân Ta (0, 2, 4...)
        features[i] = (state == current_player_color).astype(np.float32)
        # Kênh quân Địch (8, 9, 10...) -> Logic trong AlphaGo Zero xếp xen kẽ hoặc tách khối
        # Ở đây ta xếp: 8 kênh Ta trước, 8 kênh Địch sau cho dễ hình dung
        features[i + 8] = (state == opponent_color).astype(np.float32)

    # --- Kênh thứ 17: Màu quân (Ai đi lượt này?) ---
    # Nếu Đen đi: toàn số 1. Nếu Trắng đi: toàn số 0.
    if color == 'b':
        features[16] = np.ones((BOARD_SIZE, BOARD_SIZE), dtype=np.float32)

    return features

def parse_move(move_coords, board_size=19):
    """Chuyển tọa độ (row, col) thành số nguyên 0-360. Pass là 361"""
    if move_coords is None:
        return board_size * board_size # Nước Pass
    row, col = move_coords
    return row * board_size + col

In [4]:
get_ipython().system('sudo apt-get update')

0% [Working]            Hit:1 https://cli.github.com/packages stable InRelease
0% [Connecting to archive.ubuntu.com (185.125.190.83)] [Connecting to security.                                                                               Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:8 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [2,153 kB]
Get:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Get:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:11 https://ppa.launchpadcontent.

In [5]:
get_ipython().system('sudo apt-get install -y p7zip-full')

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
p7zip-full is already the newest version (16.02+dfsg-8).
0 upgraded, 0 newly installed, 0 to remove and 54 not upgraded.


In [6]:
get_ipython().system('mkdir -p Datasets')

In [7]:
import glob

# Find all .7z files in the current directory
seven_zip_files = glob.glob('*.7z')

# Extract each .7z file into the Datasets directory
for file in seven_zip_files:
    print(f"Extracting {file} to Datasets/")
    get_ipython().system(f'7z x {file} -oDatasets/')
print("Extraction complete.")

Extracting Pro.7z to Datasets/

7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.20GHz (406F0),ASM,AES-NI)

Scanning the drive for archives:
  0M Scan         1 file, 3657149 bytes (3572 KiB)

Extracting archive: Pro.7z
--
Path = Pro.7z
Type = 7z
Physical Size = 3657149
Headers Size = 137267
Method = LZMA2:16
Solid = +
Blocks = 1

  0%      4% 468 - Pro/3p/1380549114019999712.sgf                                           8% 877 - Pro/3p/1512834770010001313.sgf                                          16% 1553 - Pro/6p/1476180003019999348.sgf                                           22% 2153 - Pro/

In [8]:
import os
import glob
import numpy as np
from sgfmill import sgf, boards

# --- CẤU HÌNH ---
DATA_ROOT = './Datasets/Pro/'      # Thư mục gốc chứa 1p, 2p...
OUTPUT_DIR = './Datasets/processed/' # Nơi lưu file .npy
CHUNK_SIZE = 2000             # Cứ 2000 ván thì lưu ra 1 file (để tránh tràn RAM)
BOARD_SIZE = 19
HISTORY_LENGTH = 8            # 8 nước lịch sử

# Tạo thư mục output nếu chưa có
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# --- HÀM HỖ TRỢ (GIỮ NGUYÊN) ---
def make_input_planes(game_state, color, history_states):
    features = np.zeros((17, BOARD_SIZE, BOARD_SIZE), dtype=np.float32)
    current_player_color = color
    opponent_color = 'w' if color == 'b' else 'b'

    # Lấy lịch sử, đảo ngược để mới nhất lên đầu
    recent_states = history_states[-HISTORY_LENGTH:][::-1]

    for i, state in enumerate(recent_states):
        # 8 kênh Ta
        features[i] = (state == current_player_color).astype(np.float32)
        # 8 kênh Địch
        features[i + 8] = (state == opponent_color).astype(np.float32)

    # Kênh 17: Màu quân đi
    if color == 'b':
        features[16] = np.ones((BOARD_SIZE, BOARD_SIZE), dtype=np.float32)

    return features

def parse_move(move_coords):
    if move_coords is None:
        return BOARD_SIZE * BOARD_SIZE # Pass
    row, col = move_coords
    return row * BOARD_SIZE + col

def save_chunk(features, policies, values, chunk_id):
    """Lưu dữ liệu tạm ra file"""
    print(f"--> Saving chunk {chunk_id} with {len(features)} moves...")
    np.save(os.path.join(OUTPUT_DIR, f"features_{chunk_id}.npy"), np.array(features, dtype=np.float32))
    np.save(os.path.join(OUTPUT_DIR, f"labels_policy_{chunk_id}.npy"), np.array(policies, dtype=np.int64))
    np.save(os.path.join(OUTPUT_DIR, f"labels_value_{chunk_id}.npy"), np.array(values, dtype=np.float32))

# --- HÀM XỬ LÝ CHÍNH ---
def process_all_folders():
    # Danh sách folder cần quét (từ 1p đến 9p)
    # Nếu tên folder của bạn khác (vd: '1p_games'), hãy sửa lại list này
    target_folders = [f"{i}p" for i in range(1, 10)]

    # Bộ đệm tạm thời
    buffer_features = []
    buffer_policies = []
    buffer_values = []

    chunk_counter = 0
    total_games_processed = 0

    # Duyệt qua từng folder 1p, 2p...
    for folder_name in target_folders:
        folder_path = os.path.join(DATA_ROOT, folder_name)

        # Tìm tất cả file .sgf trong folder này
        sgf_files = glob.glob(os.path.join(folder_path, "*.sgf"))
        print(f"Processing folder: {folder_name} - Found {len(sgf_files)} files")

        for file_path in sgf_files:
            try:
                with open(file_path, "rb") as f:
                    content = f.read()

                # SGFmill parse
                try:
                    game = sgf.Sgf_game.from_bytes(content)
                except ValueError:
                    continue # Bỏ qua file lỗi

                # Lọc dữ liệu: Chỉ lấy 19x19 và không chấp quân
                if game.get_size() != 19 or (game.get_handicap() is not None and game.get_handicap() > 0):
                    continue

                winner = game.get_winner()
                if winner is None: continue

                # Replay ván đấu
                board = boards.Board(19)
                current_numpy_board = np.zeros((19, 19), dtype=object)
                history_boards = [current_numpy_board.copy()]

                for node in game.get_main_sequence():
                    color, move_coords = node.get_move()
                    if color is None: continue

                    # 1. Tạo Feature
                    input_tensor = make_input_planes(current_numpy_board, color, history_boards)

                    # 2. Tạo Label
                    policy_target = parse_move(move_coords)
                    value_target = 1.0 if winner == color else -1.0

                    # Thêm vào buffer
                    buffer_features.append(input_tensor)
                    buffer_policies.append(policy_target)
                    buffer_values.append(value_target)

                    # 3. Apply move cho vòng lặp sau
                    if move_coords is not None:
                        row, col = move_coords
                        board.play(row, col, color)
                        # Sync sang numpy
                        new_numpy_board = np.zeros((19, 19), dtype=object)
                        for r in range(19):
                            for c in range(19):
                                p = board.get(r, c)
                                if p: new_numpy_board[r, c] = p
                        current_numpy_board = new_numpy_board

                    history_boards.append(current_numpy_board.copy())

                total_games_processed += 1

                # CƠ CHẾ CHUNKING: Kiểm tra nếu buffer đầy thì lưu
                # Ở đây ta check theo số ván (game), hoặc số moves
                # Nếu buffer đạt khoảng 100.000 mẫu (moves) thì lưu là vừa đẹp
                if len(buffer_features) >= 50000: # Khoảng 200-300 ván
                    save_chunk(buffer_features, buffer_policies, buffer_values, chunk_counter)
                    chunk_counter += 1
                    # Reset buffer để giải phóng RAM
                    buffer_features = []
                    buffer_policies = []
                    buffer_values = []
                    print(f"Cleared RAM. Total games so far: {total_games_processed}")

            except Exception as e:
                print(f"Skipping file {file_path}: {e}")

    # Lưu nốt phần còn lại trong buffer (nếu có)
    if len(buffer_features) > 0:
        save_chunk(buffer_features, buffer_policies, buffer_values, chunk_counter)

    print(f"=== COMPLETED ===")
    print(f"Total games processed: {total_games_processed}")
    print(f"Data saved to: {OUTPUT_DIR}")

if __name__ == "__main__":
    process_all_folders()

Processing folder: 1p - Found 293 files
--> Saving chunk 0 with 50053 moves...
Cleared RAM. Total games so far: 266
Processing folder: 2p - Found 151 files
Skipping file ./Datasets/Pro/2p/1384432811019999870.sgf: 
Processing folder: 3p - Found 446 files
Skipping file ./Datasets/Pro/3p/1440209016019999364.sgf: 
Skipping file ./Datasets/Pro/3p/1503025471019999928.sgf: 
Skipping file ./Datasets/Pro/3p/1489043325019999769.sgf: 
Skipping file ./Datasets/Pro/3p/1487833529019999156.sgf: 
Skipping file ./Datasets/Pro/3p/1490862288019999897.sgf: 
Skipping file ./Datasets/Pro/3p/1491466159019999355.sgf: 
--> Saving chunk 1 with 50177 moves...
Cleared RAM. Total games so far: 515
Skipping file ./Datasets/Pro/3p/1500612367019999290.sgf: 
Skipping file ./Datasets/Pro/3p/1482114863019999178.sgf: 
Skipping file ./Datasets/Pro/3p/1500519692019999167.sgf: 
Skipping file ./Datasets/Pro/3p/1491985248019999753.sgf: 
Skipping file ./Datasets/Pro/3p/1489648230019999778.sgf: 
Skipping file ./Datasets/Pro/3p/

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ResidualBlock(nn.Module):
    def __init__(self, num_filters=128):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(num_filters, num_filters, kernel_size=3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(num_filters)
        self.conv2 = nn.Conv2d(num_filters, num_filters, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(num_filters)

    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += residual
        out = F.relu(out)
        return out

class GoResNet(nn.Module):
    def __init__(self, num_blocks=10, num_filters=128):
        super(GoResNet, self).__init__()
        self.conv_input = nn.Sequential(
            nn.Conv2d(17, num_filters, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(num_filters),
            nn.ReLU()
        )
        self.res_blocks = nn.ModuleList([ResidualBlock(num_filters) for _ in range(num_blocks)])

        # Policy Head
        self.policy_head = nn.Sequential(
            nn.Conv2d(num_filters, 2, kernel_size=1, bias=False),
            nn.BatchNorm2d(2),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(2 * 19 * 19, 362)
        )

        # Value Head
        self.value_head = nn.Sequential(
            nn.Conv2d(num_filters, 1, kernel_size=1, bias=False),
            nn.BatchNorm2d(1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(19 * 19, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.conv_input(x)
        for block in self.res_blocks:
            x = block(x)
        policy = self.policy_head(x)
        value = self.value_head(x)
        return policy, value

In [10]:
from torch.utils.data import Dataset, ConcatDataset
import numpy as np
import glob
import os

class GoChunkDataset(Dataset):
    def __init__(self, feature_path, policy_path, value_path):
        # mmap_mode='r' cực kỳ quan trọng trên Colab để không tràn RAM
        self.features = np.load(feature_path, mmap_mode='r')
        self.policies = np.load(policy_path, mmap_mode='r')
        self.values = np.load(value_path, mmap_mode='r')

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        # .copy() để chuyển data từ ổ cứng vào RAM
        return {
            'feature': torch.tensor(self.features[idx].copy(), dtype=torch.float32),
            'policy_target': torch.tensor(self.policies[idx].copy(), dtype=torch.long),
            'value_target': torch.tensor(self.values[idx].copy(), dtype=torch.float32)
        }

def get_dataset(data_dir):
    # Lưu ý: data_dir trên Colab sẽ là '/content/dataset'
    feature_files = sorted(glob.glob(os.path.join(data_dir, "features_*.npy")))
    datasets = []
    print(f"Scanning chunks in {data_dir}...")

    for f_path in feature_files:
        try:
            filename = os.path.basename(f_path)
            # Giả sử format tên file là features_{id}.npy
            chunk_id = filename.split('_')[1].split('.')[0]

            p_path = os.path.join(data_dir, f"labels_policy_{chunk_id}.npy")
            v_path = os.path.join(data_dir, f"labels_value_{chunk_id}.npy")

            if os.path.exists(p_path) and os.path.exists(v_path):
                ds = GoChunkDataset(f_path, p_path, v_path)
                datasets.append(ds)
        except Exception as e:
            print(f"Error loading chunk {f_path}: {e}")

    if not datasets:
        raise RuntimeError("No dataset found!")

    print(f"Loaded {len(datasets)} chunks.")
    return ConcatDataset(datasets)

In [11]:
!pip install torch torchvision



In [None]:
import torch.optim as optim
from torch.utils.data import DataLoader

# --- CẤU HÌNH ---
DATA_DIR = '/content/Datasets/processed' # Thư mục đã giải nén ở Cell 2
BATCH_SIZE = 256              # T4 GPU chịu được 128-256 mẫu
EPOCHS = 10
LEARNING_RATE = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train():
    print(f"Training on device: {DEVICE}")

    dataset = get_dataset(DATA_DIR)
    # num_workers=2 để load dữ liệu song song
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

    model = GoResNet(num_blocks=10, num_filters=128).to(DEVICE)

    criterion_policy = nn.CrossEntropyLoss()
    criterion_value = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0

        print(f"Start Epoch {epoch+1}...")

        for batch_idx, batch in enumerate(dataloader):
            features = batch['feature'].to(DEVICE)
            target_policy = batch['policy_target'].to(DEVICE)
            target_value = batch['value_target'].to(DEVICE)

            optimizer.zero_grad()
            pred_policy, pred_value = model(features)

            loss_p = criterion_policy(pred_policy, target_policy)
            loss_v = criterion_value(pred_value.squeeze(), target_value)
            loss = loss_p + loss_v

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            if batch_idx % 50 == 0:
                print(f"  Batch {batch_idx} | Loss: {loss.item():.4f}")

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1} Done. Avg Loss: {avg_loss:.4f}")

        # Lưu model vào Drive để không bị mất khi tắt Colab
        save_path = f"/content/go_model_epoch_{epoch+1}.pth"
        torch.save(model.state_dict(), save_path)
        print(f"Saved checkpoint to Drive: {save_path}")

# Gọi hàm train
train()

Training on device: cuda
Scanning chunks in /content/Datasets/processed...
Loaded 15 chunks.
Start Epoch 1...
  Batch 0 | Loss: 7.0288
  Batch 50 | Loss: 6.7111
  Batch 100 | Loss: 6.7103
  Batch 150 | Loss: 6.0325
  Batch 200 | Loss: 5.6108
  Batch 250 | Loss: 5.3915
  Batch 300 | Loss: 5.4478
  Batch 350 | Loss: 5.1579
  Batch 400 | Loss: 4.9802
  Batch 450 | Loss: 5.0842
  Batch 500 | Loss: 4.8453
  Batch 550 | Loss: 5.0354
  Batch 600 | Loss: 4.5062
  Batch 650 | Loss: 4.4162
  Batch 700 | Loss: 4.1753
  Batch 750 | Loss: 4.1625
  Batch 800 | Loss: 3.8793
  Batch 850 | Loss: 3.7574
  Batch 900 | Loss: 3.8575
  Batch 950 | Loss: 3.9408
  Batch 1000 | Loss: 3.4699
  Batch 1050 | Loss: 3.7500
  Batch 1100 | Loss: 3.5532
  Batch 1150 | Loss: 3.3970
  Batch 1200 | Loss: 3.4445
  Batch 1250 | Loss: 3.3177
  Batch 1300 | Loss: 3.3861
  Batch 1350 | Loss: 3.4579
  Batch 1400 | Loss: 3.3104
  Batch 1450 | Loss: 3.4620
  Batch 1500 | Loss: 3.5746
  Batch 1550 | Loss: 3.3582
  Batch 1600 | Lo

## FOR FINETUNE ONLY

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader

# --- CẤU HÌNH FINE-TUNE ---
PRETRAINED_PATH = "/content/go_model_epoch_10.pth" # Model cũ
NEW_DATA_DIR = "/content/Datasets" # Folder chứa dataset mới (dạng .npy)
BATCH_SIZE = 256
EPOCHS = 5               # Fine-tune thường cần ít epoch hơn train mới
LEARNING_RATE = 0.0005   # <--- QUAN TRỌNG: Giảm nhỏ hơn lúc train gốc (thường là 1/10)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def fine_tune():
    print(f"Fine-tuning on device: {DEVICE}")

    # 1. Khởi tạo kiến trúc mạng
    # LƯU Ý: num_blocks và num_filters PHẢI GIỐNG HỆT lúc train model cũ
    model = GoResNet(num_blocks=10, num_filters=128).to(DEVICE)

    # 2. Load trọng số cũ (Weights)
    print(f"Loading weights from {PRETRAINED_PATH}...")
    try:
        # map_location để đảm bảo load được dù train trên GPU khác hay CPU
        state_dict = torch.load(PRETRAINED_PATH, map_location=DEVICE)
        model.load_state_dict(state_dict)
        print("Weights loaded successfully!")
    except Exception as e:
        print(f"Error loading weights: {e}")
        return

    # 3. Chuẩn bị dữ liệu mới
    # Dùng hàm get_dataset cũ nhưng trỏ vào folder dữ liệu mới
    dataset = get_dataset(NEW_DATA_DIR)
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

    # 4. Optimizer với Learning Rate thấp
    criterion_policy = nn.CrossEntropyLoss()
    criterion_value = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) # LR nhỏ

    # 5. Vòng lặp Training (như cũ)
    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0

        print(f"Start Fine-tuning Epoch {epoch+1}...")

        for batch_idx, batch in enumerate(dataloader):
            features = batch['feature'].to(DEVICE)
            target_policy = batch['policy_target'].to(DEVICE)
            target_value = batch['value_target'].to(DEVICE)

            optimizer.zero_grad()
            pred_policy, pred_value = model(features)

            loss_p = criterion_policy(pred_policy, target_policy)
            loss_v = criterion_value(pred_value.squeeze(), target_value)
            loss = loss_p + loss_v

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            if batch_idx % 100 == 0:
                print(f"  Batch {batch_idx} | Loss: {loss.item():.4f}")

        # Lưu model fine-tune với tên mới
        save_path = f"/content/drive/MyDrive/GoAI/finetuned_epoch_{epoch+1}.pth"
        torch.save(model.state_dict(), save_path)
        print(f"Saved finetuned model: {save_path}")

if __name__ == "__main__":
    fine_tune()