In [1]:
import torch
flag = torch.cuda.is_available()
print(flag)

if flag:
    print(torch.cuda.get_device_name(0))
    print(torch.rand(3,3).cuda()) 

True
NVIDIA RTX A5000
tensor([[0.6411, 0.3169, 0.9890],
        [0.3489, 0.2425, 0.1572],
        [0.1931, 0.4706, 0.4741]], device='cuda:0')


In [20]:
import sys
import os

# 确定项目根目录（假设当前工作目录是项目的根目录）
project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

import pickle
from ai import ai_battle, MinimaxAI, MCTSAI
from game.chess_game import ChessGame
from CelestialVault.instances import ExampleThreadManager


class TrainDataThread(ExampleThreadManager):
    def get_args(self, obj: object):
        return (mcts_ai_0, mcts_ai_0, ChessGame((5, 5), 2), False)
    
    def process_result(self):
        all_training_data = []
        result_dict = self.get_result_dict()
        for d in result_dict:
            over_game = result_dict[d]
            history_board = over_game.history_board
            history_move = over_game.history_move
            for step in history_board:
                if step+1 not in history_board.keys():
                    continue
                board = self.process_board(history_board, step)
                # if (board, history_move[step+1]) in all_training_data: # 这样效果并不好
                #     continue
                all_training_data.append((board, history_move[step+1]))
        return all_training_data
    
    def process_board(self, history_board, step):
        color = 1 if step % 2 == 0 else -1
        
        processed_board = []
        for row in history_board[step]:
            processed_row = []
            for cell in row:
                processed_cell = cell + [color]
                if processed_cell[0] == float("inf"):
                    processed_cell[0] = 5
                processed_row.append(processed_cell)
            processed_board.append(processed_row)
        return processed_board



minimax_ai = MinimaxAI(5)
mcts_ai_0 = MCTSAI(1000, flag=True)
mcts_ai_1 = MCTSAI(1000, flag=False)

train_data_threader = TrainDataThread(
            ai_battle,
            thread_num=50,
            tqdm_desc='trainDataProcess',
            show_progress=True)


In [21]:

from time import strftime, localtime

def load_data(file_path):
    with open(file_path, "rb") as f:
        data = pickle.load(f)
    return data

def save_data(data):
    data_size = len(data)
    now_time = strftime("%m-%d-%H", localtime())
    pickle.dump(data, open(f"train_data/all_training_data({now_time})({data_size}).pkl", "wb"))

def train_data(train_num):
    train_data_threader.start(range(train_num), "serial")
    train_data_threader.handle_error()
    all_training_data = train_data_threader.process_result()

    save_data(all_training_data)
    
    return all_training_data

all_training_data = []
for _ in range(1):
    all_training_data += train_data(100)

len(all_training_data)

trainDataProcess:   0%|          | 0/50 [00:00<?, ?it/s]

trainDataProcess: 100%|██████████| 50/50 [36:57<00:00, 44.34s/it]
trainDataProcess: 100%|██████████| 50/50 [30:56<00:00, 37.12s/it]


1542

In [69]:
save_data(all_training_data)

In [85]:
all_training_data = load_data(r"G:\Project\Celestial-Chess\ai\train_data\all_training_data(06-13-23)(1506).pkl")

In [67]:
old_data = all_training_data[:]
all_training_data = []

for i in old_data:
    if i in all_training_data:
        continue
    all_training_data.append(i)

len(all_training_data)

9598

In [22]:
import torch
from torch.utils.data import DataLoader, Dataset

class ChessDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        board_state, move = self.data[idx]
        board_state = torch.tensor(board_state, dtype=torch.float32)
        move = move[0] * 5 + move[1]
        return board_state, move

dataset = ChessDataset(all_training_data)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

len(dataset)

1542

In [23]:
from time import strftime, localtime
import torch.nn as nn
import torch.optim as optim
from ai.deeplearning import ChessModel, DeepLearningAI

# 设置CuDNN选项
torch.backends.cudnn.benchmark = True

model = ChessModel().cuda() # 初始化模型，并将其移动到GPU上
criterion = nn.CrossEntropyLoss() # 定义交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001) # 定义Adam优化器

# 训练循环
num_epochs = 10  # 训练10个epoch
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        inputs, labels = data
        # 调整输入的维度，并将其移动到GPU上
        # inputs 的原始形状是 (batch_size, height, width, channels)，也就是 (32, 5, 5, 3)
        # inputs.permute(0, 3, 1, 2) 会将 inputs 的维度从 (32, 5, 5, 3) 转换为 (32, 3, 5, 5)
        inputs = inputs.permute(0, 3, 1, 2).cuda()  # (batch_size, channels, height, width)
        labels = labels.cuda()

        # 清零梯度
        optimizer.zero_grad()

        # 前向传播
        outputs = model(inputs)
        # 计算损失
        loss = criterion(outputs, labels)
        # 反向传播
        loss.backward()
        # 更新参数
        optimizer.step()

        # 累积损失
        running_loss += loss.item()
        if i % 100 == 99:  # 每100个batch打印一次loss
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.3f}')
            running_loss = 0.0

print('Finished Training')
data_size = len(dataset)
now_time = strftime("%m-%d-%H", localtime())
torch.save(model.state_dict(), f'models/chess_ai_model({now_time})({data_size}).pth')


Finished Training


In [30]:
import sys
import os

# 确定项目根目录（假设当前工作目录是项目的根目录）
project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

from tqdm import tqdm
from ai import ai_battle, MinimaxAI, MCTSAI
from ai.deeplearning import DeepLearningAI
from game.chess_game import ChessGame

def get_model_score_by_mcts(test_model):
    score_dict = dict()
    for i in tqdm(range(10, 1000, 10)):
        win = 0
        test_mcts = MCTSAI(i, flag=True)
        for _ in range(100):
            game = ai_battle(test_model, test_mcts, ChessGame((5, 5), 2), display=False)
            if game.who_is_winner() == 1:
                win += 1

        score_dict[i] = win / 100
        if score_dict[i] < 0.5:
            return i - 10, score_dict
        

# 深度学习AI
deep_learning_dict = {
                      # {10: 0.7, 20: 0.68, 30: 0.47}
                      "models/chess_ai_model(06-13-19).pth": 20, # 未知
                      # {10: 0.7, 20: 0.51, 30: 0.43}
                      "models/chess_ai_model(06-15-17)(1506).pth": 20, # MCTSAI(1000, flag=True) 训练100轮
                      # {10: 0.75, 20: 0.65, 30: 0.47}
                      'models/chess_ai_model(06-15-17)(1471).pth': 20, # MCTSAI(10000, flag=True) 训练100轮
                      # {10: 0.63, 20: 0.41}
                      'models/chess_ai_model(06-15-17)(936).pth': 10, # MCTSAI(10000, flag=True) 训练100轮并去重
                      # {10: 0.82, 20: 0.76, 30: 0.65, 40: 0.57, 50: 0.56, 60: 0.44}
                      'models/chess_ai_model(06-15-17)(15475).pth': 50, # MCTSAI(1000, flag=True) 训练1000轮
                      # {10: 0.84, 20: 0.58, 30: 0.57, 40: 0.61, 50: 0.56, 60: 0.41}
                      'models/chess_ai_model(06-15-17)(9598).pth': 50, # MCTSAI(1000, flag=True) 训练1000轮并去重
                      # {10: 0.32}
                      "models/chess_ai_model(06-16-15)(1400).pth": 0, # MCTSAI(10000, flag=False) 训练100轮(没标错, 真是0分)
                      # {10: 0.55, 20: 0.48}
                      'models/chess_ai_model(06-16-20)(1542).pth': 10, # MCTSAI(1000, flag=False) 训练100轮
                      }

                    
# deep_learning_ai_0 = DeepLearningAI("models/chess_ai_model(06-15-17)(936).pth") 

# 测试AI
test_minimax = MinimaxAI(6) # score: 50
test_mcts_0 = MCTSAI(1000, flag=True)
test_mcts_1 = MCTSAI(100, flag=True) # score: 50
test_mcts_2 = MCTSAI(100, flag=False) # score: 80
test_mcts_3 = MCTSAI(60, flag=True) # 50, {10: 0.82, 20: 0.76, 30: 0.53, 40: 0.57, 50: 0.51, 60: 0.32}
test_mcts_4 = MCTSAI(60, flag=False) # 110, {10: 0.86, 20: 0.86, 30: 0.77, 40: 0.63, 50: 0.58, 60: 0.63, 70: 0.58, 80: 0.52, 90: 0.51, 100: 0.5, 110: 0.52, 120: 0.45}
test_mcts_5 = MCTSAI(10, flag=True) # 0, {10: 0.45}
test_mcts_6 = MCTSAI(10, flag=False) # 50, {10: 0.76, 20: 0.69, 30: 0.57, 40: 0.56, 50: 0.5, 60: 0.4}
test_mcts_7 = MCTSAI(500, flag=True)

# 与其他AI算法进行对战
# ai_battle(deep_learning_ai_2, test_mcts_0, ChessGame((5, 5), 2), display=True)

In [31]:
get_model_score_by_mcts(test_mcts_7)

  7%|▋         | 7/99 [1:44:18<22:50:51, 894.04s/it]


KeyboardInterrupt: 

In [17]:
for path in deep_learning_dict.keys():
    deep_learning_ai = DeepLearningAI(path)
    score, score_dict = get_model_score_by_mcts(deep_learning_ai)
    deep_learning_dict[path] = score
    print(f"{path} : {score_dict}", end = '\n')

deep_learning_dict

  0%|          | 0/99 [00:00<?, ?it/s]

  2%|▏         | 2/99 [01:58<1:36:03, 59.42s/it]


models/chess_ai_model(06-13-19).pth : {10: 0.7, 20: 0.68, 30: 0.47}


  2%|▏         | 2/99 [01:58<1:36:08, 59.47s/it]


models/chess_ai_model(06-15-17)(1506).pth : {10: 0.7, 20: 0.51, 30: 0.43}


  2%|▏         | 2/99 [01:50<1:29:12, 55.18s/it]


models/chess_ai_model(06-15-17)(1471).pth : {10: 0.75, 20: 0.65, 30: 0.47}


  1%|          | 1/99 [00:51<1:24:09, 51.53s/it]


models/chess_ai_model(06-15-17)(936).pth : {10: 0.63, 20: 0.41}


  5%|▌         | 5/99 [08:03<2:31:38, 96.79s/it]


models/chess_ai_model(06-15-17)(15475).pth : {10: 0.82, 20: 0.76, 30: 0.65, 40: 0.57, 50: 0.56, 60: 0.44}


  5%|▌         | 5/99 [07:03<2:12:42, 84.70s/it]


models/chess_ai_model(06-15-17)(9598).pth : {10: 0.84, 20: 0.58, 30: 0.57, 40: 0.61, 50: 0.56, 60: 0.41}


  0%|          | 0/99 [00:20<?, ?it/s]

models/chess_ai_model(06-16-15)(1400).pth : {10: 0.32}





{'models/chess_ai_model(06-13-19).pth': 20,
 'models/chess_ai_model(06-15-17)(1506).pth': 20,
 'models/chess_ai_model(06-15-17)(1471).pth': 20,
 'models/chess_ai_model(06-15-17)(936).pth': 10,
 'models/chess_ai_model(06-15-17)(15475).pth': 50,
 'models/chess_ai_model(06-15-17)(9598).pth': 50,
 'models/chess_ai_model(06-16-15)(1400).pth': 0}