In [None]:
import torch
flag = torch.cuda.is_available()
print(flag)

if flag:
    print(torch.cuda.get_device_name(0))
    print(torch.rand(3,3).cuda()) 

True
NVIDIA RTX A5000
tensor([[0.4938, 0.5651, 0.5798],
        [0.5265, 0.5529, 0.2071],
        [0.1274, 0.2573, 0.5208]], device='cuda:0')


In [1]:
import sys
import os

# 确定项目根目录（假设当前工作目录是项目的根目录）
project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

import pickle, re, json
import numpy as np
from ai import ai_battle, MinimaxAI, MCTSAI
from game.chess_game import ChessGame
from CelestialVault.instances import ExampleThreadManager
from time import strftime, localtime


class TrainDataThread(ExampleThreadManager):
    def get_args(self, obj: object):
        train_game = ChessGame((5, 5), 2)
        train_game.init_cfunc()
        train_game.init_history()
        return (mcts_ai_0, mcts_ai_0, train_game, False)
    
    def process_result(self):
        all_training_data = []
        result_dict = self.get_result_dict()
        for over_game in result_dict.values():
            history_board = over_game.history_board
            history_move = over_game.history_move
            for step in range(over_game.max_step-1):
                board = self.process_board(history_board[step], step)
                # if (board, history_move[step+1]) in all_training_data: # 这样效果并不好
                #     continue
                all_training_data.append((board, history_move[step+1]))
        return all_training_data
    
    def process_board(self, chess_board, step):
        color = 1 if step % 2 == 0 else -1
        color_channel = np.full((5, 5, 1), color)

        processed_board = np.concatenate((chess_board, color_channel), axis=2)
        
        for row in processed_board:
            for cell in row:
                if cell[0] == float("inf"):
                    cell[0] = 5
        return processed_board

def load_data(file_path):
    with open(file_path, "rb") as f:
        data = pickle.load(f)
    return data

def save_data(data):
    data_size = len(data)
    now_time = strftime("%m-%d-%H", localtime())
    pickle.dump(data, open(f"train_data/all_training_data({now_time})({data_size}).pkl", "wb"))

def start_train_data(train_num):
    train_data_threader.start(range(train_num), "serial")
    train_data_threader.handle_error()
    all_training_data = train_data_threader.process_result()

    save_data(all_training_data)
    
    return all_training_data

def get_model_info_dict(path, train_data_path, model):
    model_info_dict = {}
    model_info_dict["path"] = path
    model_info_dict["train_data_path"] = train_data_path

    model_str = str(model)
    model_lines = re.split(r'\n', model_str)
    layer_dict = {}
    for line in model_lines[1:-1]:  # 跳过开头和结尾的行
        re_ = re.compile(r"\((.*?)\): (.*)")
        layer_name = re_.search(line).group(1)
        layer_args = re_.search(line).group(2)
        
        layer_dict[layer_name] = layer_args
    model_info_dict["layers"] = layer_dict


    return model_info_dict

def save_info_dict(info_dict, model_type):
    with open('model_score.json', 'r') as f:
        model_score = json.load(f)

    model_score[model_type].append(info_dict)

    with open('model_score.json', 'w') as f:
        json.dump(model_score, f, indent=2)

# minimax_ai = MinimaxAI(5)
mcts_ai_0 = MCTSAI(1000, complate_mode=False)
mcts_ai_1 = MCTSAI(50000, complate_mode=False)

train_data_threader = TrainDataThread(
            ai_battle,
            thread_num=200,
            tqdm_desc='trainDataProcess',
            show_progress=True)


In [8]:
all_training_data = []
for _ in range(1):
    all_training_data += train_data(1000)

len(all_training_data)

trainDataProcess: 100%|██████████| 200/200 [01:28<00:00,  2.26it/s]
trainDataProcess: 100%|██████████| 200/200 [01:28<00:00,  2.25it/s]
trainDataProcess: 100%|██████████| 200/200 [01:28<00:00,  2.25it/s]
trainDataProcess: 100%|██████████| 200/200 [01:28<00:00,  2.25it/s]
trainDataProcess: 100%|██████████| 200/200 [01:29<00:00,  2.23it/s]


13631

In [3]:
all_training_data[0]

(array([[[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],
 
        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],
 
        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],
 
        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]],
 
        [[0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.],
         [0., 0., 1.]]]),
 array([1, 2]))

In [2]:
train_data_path = r"train_data\train_data(06-22-17)(136090).pkl"

train_data = load_data(train_data_path)

In [3]:
import torch
from torch.utils.data import DataLoader, Dataset

class ChessDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        board_state, move = self.data[idx]
        board_state = torch.tensor(board_state, dtype=torch.float32)
        move = move[0] * 5 + move[1]
        return board_state, move

dataset = ChessDataset(train_data)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

len(dataset)

136090

In [4]:
from time import strftime, localtime
import torch.nn as nn
import torch.optim as optim
from ai.deeplearning import ChessPolicyModel


# 设置CuDNN选项
torch.backends.cudnn.benchmark = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ChessPolicyModel().to(device) # 初始化模型，并将其移动到GPU上
criterion = nn.CrossEntropyLoss() # 定义交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001) # 定义Adam优化器

train_log_text = []
# 训练循环
num_epochs = 10  # 训练10个epoch
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        inputs, labels = data
        # 调整输入的维度，并将其移动到GPU上
        # inputs 的原始形状是 (batch_size, height, width, channels)，也就是 (32, 5, 5, 3)
        # inputs.permute(0, 3, 1, 2) 会将 inputs 的维度从 (32, 5, 5, 3) 转换为 (32, 3, 5, 5)
        inputs = inputs.permute(0, 3, 1, 2).to(device)  # (batch_size, channels, height, width)
        labels = labels.to(device).to(torch.int64)

        # 清零梯度
        optimizer.zero_grad()

        # 前向传播
        outputs = model(inputs)
        # 计算损失
        loss = criterion(outputs, labels)
        # 反向传播
        loss.backward()
        # 更新参数
        optimizer.step()

        # 累积损失
        running_loss += loss.item()
        if i % 100 == 99:  # 每100个batch打印一次loss
            log_text = f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.3f}'
            print(log_text)
            train_log_text.append(log_text)
            running_loss = 0.0

print('Finished Training')
data_size = len(dataset)
now_time = strftime("%m-%d-%H-%M", localtime())
model_path = f'models/dl_model({now_time})({data_size}).pth'
torch.save(model.state_dict(), model_path)

model_info_dict = get_model_info_dict(model_path, train_data_path, model)
save_info_dict(model_info_dict, "DeepLearningAI")

model_loss_path = f'models_loss/dl_model({now_time})({data_size}).txt'
with open(model_loss_path, 'w') as f:
    f.write('\n'.join(train_log_text))


Epoch 1, Batch 100, Loss: 3.245
Epoch 1, Batch 200, Loss: 3.245
Epoch 1, Batch 300, Loss: 3.245
Epoch 1, Batch 400, Loss: 3.250
Epoch 1, Batch 500, Loss: 3.252
Epoch 1, Batch 600, Loss: 3.239
Epoch 1, Batch 700, Loss: 3.243
Epoch 1, Batch 800, Loss: 3.244
Epoch 1, Batch 900, Loss: 3.246
Epoch 1, Batch 1000, Loss: 3.244
Epoch 1, Batch 1100, Loss: 3.248
Epoch 1, Batch 1200, Loss: 3.247
Epoch 1, Batch 1300, Loss: 3.253
Epoch 1, Batch 1400, Loss: 3.244
Epoch 1, Batch 1500, Loss: 3.242
Epoch 1, Batch 1600, Loss: 3.248
Epoch 1, Batch 1700, Loss: 3.248
Epoch 1, Batch 1800, Loss: 3.248
Epoch 1, Batch 1900, Loss: 3.249
Epoch 1, Batch 2000, Loss: 3.247
Epoch 1, Batch 2100, Loss: 3.246
Epoch 1, Batch 2200, Loss: 3.244
Epoch 1, Batch 2300, Loss: 3.248
Epoch 1, Batch 2400, Loss: 3.243
Epoch 1, Batch 2500, Loss: 3.248
Epoch 1, Batch 2600, Loss: 3.253
Epoch 1, Batch 2700, Loss: 3.242
Epoch 1, Batch 2800, Loss: 3.243
Epoch 1, Batch 2900, Loss: 3.248
Epoch 1, Batch 3000, Loss: 3.244
Epoch 1, Batch 3100

TypeError: Object of type ChessPolicyModel is not JSON serializable

In [10]:
from torchsummary import summary

summary(model, input_size=(3, 5, 5))  # 输入模型和输入tensor尺寸

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1             [-1, 32, 5, 5]             896
            Conv2d-2             [-1, 64, 5, 5]          18,496
            Conv2d-3            [-1, 128, 5, 5]          73,856
            Conv2d-4            [-1, 256, 5, 5]         295,168
            Linear-5                  [-1, 512]       3,277,312
           Dropout-6                  [-1, 512]               0
            Linear-7                   [-1, 25]          12,825
Total params: 3,678,553
Trainable params: 3,678,553
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.10
Params size (MB): 14.03
Estimated Total Size (MB): 14.13
----------------------------------------------------------------


## Show the Result

In [10]:
import sys
import os

# 确定项目根目录（假设当前工作目录是项目的根目录）
project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

from tqdm import tqdm
from ai import MinimaxAI, MCTSAI
from ai.test_ai import get_model_score_by_mcts, ai_battle
from ai.deeplearning import DeepLearningAI
from game.chess_game import ChessGame


def get_best_c_param(game_state):
    best_c_param = 0.0
    c_param_dict = dict()
    best_mcts = MCTSAI(100, c_param=best_c_param, policy_net=policy_model, complate_mode=False)

    for param in tqdm(range(0, 11, 1)):
        win = 0
        test_mcts = MCTSAI(100, c_param=param/10, policy_net=policy_model, complate_mode=False)

        for _ in range(100):
            test_game = ChessGame(*game_state)
            test_game.init_history()
            over_game = ai_battle(best_mcts, test_mcts, test_game, display=False)
            winner = over_game.who_is_winner()
            if winner == 1:
                win += 1
            elif winner == 0:
                win += 0.5

        c_param_dict[f"{best_c_param} : {param/10}"] = win
        if win < 50:
            best_c_param = param/10
            best_mcts = MCTSAI(100, c_param=best_c_param, complate_mode=False)
        
    return best_c_param, c_param_dict

# 与其他AI算法进行对战
game_state = ((5,5), 2)

# policy_model = DeepLearningAI('models/dl_model(06-22-21-18)(136090)(32-64-128-256).pth', complate_mode=False)

In [7]:
get_best_c_param(game_state)

100%|██████████| 11/11 [44:03<00:00, 240.33s/it] 


(1.0,
 {'0 : 0.0': 47.0,
  '0.0 : 0.1': 70.0,
  '0.0 : 0.2': 60.0,
  '0.0 : 0.3': 63.5,
  '0.0 : 0.4': 40.0,
  '0.4 : 0.5': 49.5,
  '0.5 : 0.6': 55.0,
  '0.5 : 0.7': 55.0,
  '0.5 : 0.8': 53.5,
  '0.5 : 0.9': 53.0,
  '0.5 : 1.0': 47.0})

In [3]:
print(get_model_score_by_mcts(MCTSAI(100, 0.9, complate_mode=False), game_state))

  1%|          | 10/999 [00:41<1:08:30,  4.16s/it]

(100, {10: 0.73, 20: 0.72, 30: 0.66, 40: 0.685, 50: 0.585, 60: 0.575, 70: 0.59, 80: 0.48, 90: 0.55, 100: 0.48, 110: 0.525})





In [4]:
policy_model = DeepLearningAI('models/dl_model(06-22-21-18)(136090)(32-64-128-256).pth', complate_mode=False)
mcts_model = MCTSAI(100, c_param=0.5, policy_net=policy_model, complate_mode=False)

print(get_model_score_by_mcts(mcts_model, game_state))

  0%|          | 0/999 [00:00<?, ?it/s]

  1%|          | 8/999 [26:49<55:23:30, 201.22s/it]

(80, {10: 0.735, 20: 0.69, 30: 0.67, 40: 0.565, 50: 0.545, 60: 0.56, 70: 0.555, 80: 0.5, 90: 0.49})





In [11]:
print(get_model_score_by_mcts(DeepLearningAI(model_path, complate_mode=False), game_state))

  0%|          | 1/999 [00:03<55:17,  3.32s/it]

(10, {10: 0.63, 20: 0.54})





In [6]:
deeplearning_model = DeepLearningAI(r"models\chess_ai_model(06-15-17)(15475).pth") 

len(deeplearning_model.model.conv1.weight.data)

32

## old

In [None]:
# 深度学习AI
deep_learning_dict = {
                      # {10: 0.7, 20: 0.68, 30: 0.47}
                      # 
                      "models/chess_ai_model(06-13-19).pth": 20, # 未知
                      # {10: 0.7, 20: 0.51, 30: 0.43}
                      # 
                      "models/chess_ai_model(06-15-17)(1506).pth": 20, # MCTSAI(1000, flag=True) 训练100轮
                      # {10: 0.75, 20: 0.65, 30: 0.47}
                      # 
                      'models/chess_ai_model(06-15-17)(1471).pth': 20, # MCTSAI(10000, flag=True) 训练100轮
                      # {10: 0.63, 20: 0.41}
                      'models/chess_ai_model(06-15-17)(936).pth': 10, # MCTSAI(10000, flag=True) 训练100轮并去重
                      # {10: 0.82, 20: 0.76, 30: 0.65, 40: 0.57, 50: 0.56, 60: 0.44}
                      # 
                      'models/chess_ai_model(06-15-17)(15475).pth': 50, # MCTSAI(1000, flag=True) 训练1000轮
                      # {10: 0.84, 20: 0.58, 30: 0.57, 40: 0.61, 50: 0.56, 60: 0.41}
                      'models/chess_ai_model(06-15-17)(9598).pth': 50, # MCTSAI(1000, flag=True) 训练1000轮并去重
                      # {10: 0.32}
                      "models/chess_ai_model(06-16-15)(1400).pth": 0, # MCTSAI(10000, flag=False) 训练100轮(没标错, 真是0分)
                      # {10: 0.55, 20: 0.48}
                      'models/chess_ai_model(06-16-20)(1542).pth': 10, # MCTSAI(1000, flag=False) 训练100轮

                      # {10: 0.82, 20: 0.73, 30: 0.67, 40: 0.52, 50: 0.46}
                      # 
                      'models/chess_ai_model(06-17-12)(15475).pth': 40, # MCTSAI(1000, flag=True) 训练1000轮, 使用三层32-64-128卷积层, 128-256-25全连接层
                      # {10: 0.72, 20: 0.39}
                      'models/chess_ai_model(06-17-12)(1506).pth': 10, # MCTSAI(1000, flag=True) 训练100轮, 使用三层32-64-128卷积层, 128-256-25全连接层
                      
                      # {10: 0.9, 20: 0.84, 30: 0.72, 40: 0.6, 50: 0.48}
                      # {10: 0.88, 20: 0.83, 30: 0.69, 40: 0.68, 50: 0.57, 60: 0.65, 70: 0.61, 80: 0.63, 90: 0.49}
                      # 
                      'models/chess_ai_model(06-17-13)(15475).pth': 80, # MCTSAI(1000, flag=True) 训练1000轮, 使用三层16-32-64卷积层, 64-128-25全连接层
                      }

                    
# deep_learning_ai_0 = DeepLearningAI("models/chess_ai_model(06-15-17)(936).pth") 

# 测试AI
test_minimax = MinimaxAI(6) # score: 50
test_mcts_0 = MCTSAI(1000) # 
test_mcts_1 = MCTSAI(100, flag=True) # score: 50
test_mcts_2 = MCTSAI(100, flag=False) # score: 80
test_mcts_3 = MCTSAI(60, flag=True) # 50, {10: 0.82, 20: 0.76, 30: 0.53, 40: 0.57, 50: 0.51, 60: 0.32}
test_mcts_4 = MCTSAI(60, flag=False) # 110, {10: 0.86, 20: 0.86, 30: 0.77, 40: 0.63, 50: 0.58, 60: 0.63, 70: 0.58, 80: 0.52, 90: 0.51, 100: 0.5, 110: 0.52, 120: 0.45}
test_mcts_5 = MCTSAI(10, flag=True) # 0, {10: 0.45}
test_mcts_6 = MCTSAI(10, flag=False) # 50, {10: 0.76, 20: 0.69, 30: 0.57, 40: 0.56, 50: 0.5, 60: 0.4}
test_mcts_7 = MCTSAI(500, flag=True)