In [None]:
import torch
flag = torch.cuda.is_available()
print(flag)

if flag:
    print(torch.cuda.get_device_name(0))
    print(torch.rand(3,3).cuda()) 

True
NVIDIA RTX A5000
tensor([[0.4938, 0.5651, 0.5798],
        [0.5265, 0.5529, 0.2071],
        [0.1274, 0.2573, 0.5208]], device='cuda:0')


In [2]:
import sys
import os

# 确定项目根目录（假设当前工作目录是项目的根目录）
project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

import pickle
import numpy as np
from ai import ai_battle, MinimaxAI, MCTSAI
from game.chess_game import ChessGame
from CelestialVault.instances import ExampleThreadManager
from time import strftime, localtime


class TrainDataThread(ExampleThreadManager):
    def get_args(self, obj: object):
        train_game = ChessGame((5, 5), 2)
        train_game.init_cfunc()
        train_game.init_history()
        return (mcts_ai_0, mcts_ai_0, train_game, False)
    
    def process_result(self):
        all_training_data = []
        result_dict = self.get_result_dict()
        for over_game in result_dict.values():
            history_board = over_game.history_board
            history_move = over_game.history_move
            for step in range(over_game.max_step-1):
                board = self.process_board(history_board[step], step)
                # if (board, history_move[step+1]) in all_training_data: # 这样效果并不好
                #     continue
                all_training_data.append((board, history_move[step+1]))
        return all_training_data
    
    def process_board(self, chess_board, step):
        color = 1 if step % 2 == 0 else -1
        color_channel = np.full((5, 5, 1), color)

        processed_board = np.concatenate((chess_board, color_channel), axis=2)
        
        for row in processed_board:
            for cell in row:
                if cell[0] == float("inf"):
                    cell[0] = 5
        return processed_board

def load_data(file_path):
    with open(file_path, "rb") as f:
        data = pickle.load(f)
    return data

def save_data(data):
    data_size = len(data)
    now_time = strftime("%m-%d-%H", localtime())
    pickle.dump(data, open(f"train_data/all_training_data({now_time})({data_size}).pkl", "wb"))

def train_data(train_num):
    train_data_threader.start(range(train_num), "serial")
    train_data_threader.handle_error()
    all_training_data = train_data_threader.process_result()

    save_data(all_training_data)
    
    return all_training_data

minimax_ai = MinimaxAI(5)
mcts_ai_0 = MCTSAI(1000, complate_mode=False)
mcts_ai_1 = MCTSAI(50000, complate_mode=False)

train_data_threader = TrainDataThread(
            ai_battle,
            thread_num=200,
            tqdm_desc='trainDataProcess',
            show_progress=True)


In [2]:
all_training_data = []
for _ in range(1):
    all_training_data += train_data(10000)

len(all_training_data)

trainDataProcess: 100%|██████████| 200/200 [01:25<00:00,  2.33it/s]
trainDataProcess: 100%|██████████| 200/200 [01:25<00:00,  2.34it/s]
trainDataProcess: 100%|██████████| 200/200 [01:27<00:00,  2.28it/s]
trainDataProcess: 100%|██████████| 200/200 [01:26<00:00,  2.31it/s]
trainDataProcess: 100%|██████████| 200/200 [01:17<00:00,  2.60it/s]
trainDataProcess: 100%|██████████| 200/200 [01:17<00:00,  2.58it/s]
trainDataProcess: 100%|██████████| 200/200 [01:20<00:00,  2.48it/s]
trainDataProcess: 100%|██████████| 200/200 [01:18<00:00,  2.54it/s]
trainDataProcess: 100%|██████████| 200/200 [01:19<00:00,  2.50it/s]
trainDataProcess: 100%|██████████| 200/200 [01:19<00:00,  2.51it/s]
trainDataProcess: 100%|██████████| 200/200 [01:18<00:00,  2.54it/s]
trainDataProcess: 100%|██████████| 200/200 [01:17<00:00,  2.58it/s]
trainDataProcess: 100%|██████████| 200/200 [01:18<00:00,  2.53it/s]
trainDataProcess: 100%|██████████| 200/200 [01:17<00:00,  2.59it/s]
trainDataProcess: 100%|██████████| 200/200 [01:1

136090

In [3]:
all_training_data[0]

(array([[[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],
 
        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],
 
        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],
 
        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],
 
        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]]]),
 array([1, 2]))

In [3]:
all_training_data = load_data(r"train_data\train_data(06-22-17)(136090).pkl")

In [4]:
import torch
from torch.utils.data import DataLoader, Dataset

class ChessDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        board_state, move = self.data[idx]
        board_state = torch.tensor(board_state, dtype=torch.float32)
        move = move[0] * 5 + move[1]
        return board_state, move

dataset = ChessDataset(all_training_data)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

len(dataset)

136090

In [5]:
from time import strftime, localtime
import torch.nn as nn
import torch.optim as optim
from ai.deeplearning import ChessModel

# 设置CuDNN选项
torch.backends.cudnn.benchmark = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ChessModel().to(device) # 初始化模型，并将其移动到GPU上
criterion = nn.CrossEntropyLoss() # 定义交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001) # 定义Adam优化器

# 训练循环
num_epochs = 10  # 训练10个epoch
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        inputs, labels = data
        # 调整输入的维度，并将其移动到GPU上
        # inputs 的原始形状是 (batch_size, height, width, channels)，也就是 (32, 5, 5, 3)
        # inputs.permute(0, 3, 1, 2) 会将 inputs 的维度从 (32, 5, 5, 3) 转换为 (32, 3, 5, 5)
        inputs = inputs.permute(0, 3, 1, 2).to(device)  # (batch_size, channels, height, width)
        labels = labels.to(device).to(torch.int64)

        # 清零梯度
        optimizer.zero_grad()

        # 前向传播
        outputs = model(inputs)
        # 计算损失
        loss = criterion(outputs, labels)
        # 反向传播
        loss.backward()
        # 更新参数
        optimizer.step()

        # 累积损失
        running_loss += loss.item()
        if i % 100 == 99:  # 每100个batch打印一次loss
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.3f}')
            running_loss = 0.0

print('Finished Training')
data_size = len(dataset)
now_time = strftime("%m-%d-%H-%M", localtime())
add_text = '32-64-128-256'
torch.save(model.state_dict(), f'models/dl_model({now_time})({data_size})({add_text}).pth')


Epoch 1, Batch 100, Loss: 3.136
Epoch 1, Batch 200, Loss: 3.050
Epoch 1, Batch 300, Loss: 3.003
Epoch 1, Batch 400, Loss: 2.915
Epoch 1, Batch 500, Loss: 2.831
Epoch 1, Batch 600, Loss: 2.754
Epoch 1, Batch 700, Loss: 2.690
Epoch 1, Batch 800, Loss: 2.654
Epoch 1, Batch 900, Loss: 2.587
Epoch 1, Batch 1000, Loss: 2.561
Epoch 1, Batch 1100, Loss: 2.505
Epoch 1, Batch 1200, Loss: 2.490
Epoch 1, Batch 1300, Loss: 2.403
Epoch 1, Batch 1400, Loss: 2.303
Epoch 1, Batch 1500, Loss: 2.285
Epoch 1, Batch 1600, Loss: 2.233
Epoch 1, Batch 1700, Loss: 2.215
Epoch 1, Batch 1800, Loss: 2.168
Epoch 1, Batch 1900, Loss: 2.125
Epoch 1, Batch 2000, Loss: 2.086
Epoch 1, Batch 2100, Loss: 2.052
Epoch 1, Batch 2200, Loss: 2.030
Epoch 1, Batch 2300, Loss: 1.962
Epoch 1, Batch 2400, Loss: 1.884
Epoch 1, Batch 2500, Loss: 1.872
Epoch 1, Batch 2600, Loss: 1.867
Epoch 1, Batch 2700, Loss: 1.837
Epoch 1, Batch 2800, Loss: 1.798
Epoch 1, Batch 2900, Loss: 1.795
Epoch 1, Batch 3000, Loss: 1.753
Epoch 1, Batch 3100

## Show the Result

In [6]:
import sys
import os

# 确定项目根目录（假设当前工作目录是项目的根目录）
project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

from ai import MinimaxAI, MCTSAI
from ai.test_ai import get_model_score_by_mcts
from ai.deeplearning import DeepLearningAI
from game.chess_game import ChessGame


model_score_dict = {
    "MCTSAI(1000, complate_mode=False)": (990, ),
    "MCTSAI(100, complate_mode=False)": (110, {10: 0.79, 20: 0.71, 30: 0.68, 40: 0.595, 50: 0.565, 60: 0.585, 70: 0.615, 80: 0.54, 90: 0.555, 100: 0.435, 110: 0.53, 120: 0.415}),
    "MCTSAI(50, complate_mode=False)": (40, {10: 0.72, 20: 0.605, 30: 0.565, 40: 0.535, 50: 0.535}),
    "MinimaxAI(5, *game_state, complate_mode=False)": (0, {10: 0.495}),
    "MinimaxAI(5, *game_state, complate_mode=True)": (0, {10: 0.47}),
    "MinimaxAI(3, *game_state, complate_mode=False)": (0, {10: 0.44}),
    'DeepLearningAI("models/dl_model(06-22-16)(127)(16-32-64).pth")': (0, {10: 0.36}),
    'DeepLearningAI("models/dl_model(06-22-20)(136090)(16-32-64).pth")': (60, {10: 0.74, 20: 0.575, 30: 0.675, 40: 0.615, 50: 0.545, 60: 0.56, 70: 0.46}),
    'DeepLearningAI("models/dl_model(06-22-20)(136090)(32-64-128).pth")': (120, {10: 0.77, 20: 0.8, 30: 0.715, 40: 0.615, 50: 0.625, 60: 0.655, 70: 0.505, 80: 0.575, 90: 0.53, 100: 0.575, 110: 0.53, 120: 0.46, 130: 0.44}),
    'DeepLearningAI("models/dl_model(06-22-21-09)(136090)(32-64-64-128).pth", complate_mode=False)': (30, {10: 0.69, 20: 0.63, 30: 0.55, 40: 0.5}),
    'DeepLearningAI("models/dl_model(06-22-21-18)(136090)(32-64-128-256).pth", complate_mode=False)': (190, {10: 0.78, 20: 0.69, 30: 0.73, 40: 0.775, 50: 0.675, 60: 0.635, 70: 0.505, 80: 0.595, 90: 0.505, 100: 0.575, 110: 0.545, 120: 0.63, 130: 0.485, 140: 0.505, 150: 0.58, 160: 0.62, 170: 0.48, 180: 0.595, 190: 0.5, 200: 0.57}),
    

}

# 与其他AI算法进行对战
# ai_battle(deep_learning_ai_2, test_mcts_0, ChessGame((5, 5), 2), display=True)
game_state = ((5,5), 2)

In [2]:
print(get_model_score_by_mcts(MCTSAI(50, complate_mode=False), game_state))

TypeError: get_model_score_by_mcts() missing 1 required positional argument: 'game_state'

In [None]:
with MCTSAI(100, complate_mode=False) as test_ai:
    score,score_dict = get_model_score_by_mcts(test_ai)
    print(score,score_dict)


AttributeError: __enter__

In [7]:
print(get_model_score_by_mcts(DeepLearningAI("models/dl_model(06-22-21-18)(136090)(32-64-128-256).pth", complate_mode=False), game_state))

  2%|▏         | 19/999 [01:12<1:02:35,  3.83s/it]

(190, {10: 0.78, 20: 0.69, 30: 0.73, 40: 0.775, 50: 0.675, 60: 0.635, 70: 0.505, 80: 0.595, 90: 0.505, 100: 0.575, 110: 0.545, 120: 0.63, 130: 0.485, 140: 0.505, 150: 0.58, 160: 0.62, 170: 0.48, 180: 0.595, 190: 0.5, 200: 0.57})





In [6]:
deeplearning_model = DeepLearningAI(r"models\chess_ai_model(06-15-17)(15475).pth") 

len(deeplearning_model.model.conv1.weight.data)

32

In [2]:
for path in deep_learning_dict.keys():
    deep_learning_ai = DeepLearningAI(path)
    score, score_dict = get_model_score_by_mcts(deep_learning_ai)
    deep_learning_dict[path] = score
    print(f"{path} : {score_dict}", end = '\n')

deep_learning_dict

RuntimeError: Error(s) in loading state_dict for ChessModel:
	Missing key(s) in state_dict: "conv3.weight", "conv3.bias". 
	size mismatch for conv1.weight: copying a param with shape torch.Size([32, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 3, 3, 3]).
	size mismatch for conv1.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([16]).
	size mismatch for conv2.weight: copying a param with shape torch.Size([64, 32, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 16, 3, 3]).
	size mismatch for conv2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([32]).

## old

In [None]:
# 深度学习AI
deep_learning_dict = {
                      # {10: 0.7, 20: 0.68, 30: 0.47}
                      # 
                      "models/chess_ai_model(06-13-19).pth": 20, # 未知
                      # {10: 0.7, 20: 0.51, 30: 0.43}
                      # 
                      "models/chess_ai_model(06-15-17)(1506).pth": 20, # MCTSAI(1000, flag=True) 训练100轮
                      # {10: 0.75, 20: 0.65, 30: 0.47}
                      # 
                      'models/chess_ai_model(06-15-17)(1471).pth': 20, # MCTSAI(10000, flag=True) 训练100轮
                      # {10: 0.63, 20: 0.41}
                      'models/chess_ai_model(06-15-17)(936).pth': 10, # MCTSAI(10000, flag=True) 训练100轮并去重
                      # {10: 0.82, 20: 0.76, 30: 0.65, 40: 0.57, 50: 0.56, 60: 0.44}
                      # 
                      'models/chess_ai_model(06-15-17)(15475).pth': 50, # MCTSAI(1000, flag=True) 训练1000轮
                      # {10: 0.84, 20: 0.58, 30: 0.57, 40: 0.61, 50: 0.56, 60: 0.41}
                      'models/chess_ai_model(06-15-17)(9598).pth': 50, # MCTSAI(1000, flag=True) 训练1000轮并去重
                      # {10: 0.32}
                      "models/chess_ai_model(06-16-15)(1400).pth": 0, # MCTSAI(10000, flag=False) 训练100轮(没标错, 真是0分)
                      # {10: 0.55, 20: 0.48}
                      'models/chess_ai_model(06-16-20)(1542).pth': 10, # MCTSAI(1000, flag=False) 训练100轮

                      # {10: 0.82, 20: 0.73, 30: 0.67, 40: 0.52, 50: 0.46}
                      # 
                      'models/chess_ai_model(06-17-12)(15475).pth': 40, # MCTSAI(1000, flag=True) 训练1000轮, 使用三层32-64-128卷积层, 128-256-25全连接层
                      # {10: 0.72, 20: 0.39}
                      'models/chess_ai_model(06-17-12)(1506).pth': 10, # MCTSAI(1000, flag=True) 训练100轮, 使用三层32-64-128卷积层, 128-256-25全连接层
                      
                      # {10: 0.9, 20: 0.84, 30: 0.72, 40: 0.6, 50: 0.48}
                      # {10: 0.88, 20: 0.83, 30: 0.69, 40: 0.68, 50: 0.57, 60: 0.65, 70: 0.61, 80: 0.63, 90: 0.49}
                      # 
                      'models/chess_ai_model(06-17-13)(15475).pth': 80, # MCTSAI(1000, flag=True) 训练1000轮, 使用三层16-32-64卷积层, 64-128-25全连接层
                      }

                    
# deep_learning_ai_0 = DeepLearningAI("models/chess_ai_model(06-15-17)(936).pth") 

# 测试AI
test_minimax = MinimaxAI(6) # score: 50
test_mcts_0 = MCTSAI(1000) # 
test_mcts_1 = MCTSAI(100, flag=True) # score: 50
test_mcts_2 = MCTSAI(100, flag=False) # score: 80
test_mcts_3 = MCTSAI(60, flag=True) # 50, {10: 0.82, 20: 0.76, 30: 0.53, 40: 0.57, 50: 0.51, 60: 0.32}
test_mcts_4 = MCTSAI(60, flag=False) # 110, {10: 0.86, 20: 0.86, 30: 0.77, 40: 0.63, 50: 0.58, 60: 0.63, 70: 0.58, 80: 0.52, 90: 0.51, 100: 0.5, 110: 0.52, 120: 0.45}
test_mcts_5 = MCTSAI(10, flag=True) # 0, {10: 0.45}
test_mcts_6 = MCTSAI(10, flag=False) # 50, {10: 0.76, 20: 0.69, 30: 0.57, 40: 0.56, 50: 0.5, 60: 0.4}
test_mcts_7 = MCTSAI(500, flag=True)