In [1]:
import torch
flag = torch.cuda.is_available()
print(flag)

ngpu= 1
# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
print(device)
print(torch.cuda.get_device_name(0))
print(torch.rand(3,3).cuda()) 

True
cuda:0
NVIDIA RTX A5000
tensor([[0.9019, 0.5367, 0.5228],
        [0.7156, 0.9414, 0.0117],
        [0.4138, 0.0572, 0.3236]], device='cuda:0')


In [14]:
import sys
import os

# 确定项目根目录（假设当前工作目录是项目的根目录）
project_root = os.path.abspath("..")
sys.path.insert(0, project_root)

from tqdm import tqdm
from ai import ai_battle, MinimaxAI, MCTSAI
from game.chess_game import ChessGame


minimax_ai = MinimaxAI(3)
mcts_ai_0 = MCTSAI(1000, flag=True)
mcts_ai_1 = MCTSAI(1000, flag=False)

all_training_data = []

# 生成训练数据
for _ in tqdm(range(2)):  # 生成100场对战数据
    # all_training_data.append(("start", None))
    over_game = ai_battle(mcts_ai_0, mcts_ai_0, ChessGame((5, 5), 2), False)
    history_board = over_game.history_board
    history_move = over_game.history_move
    for step in history_board:
        if step == 0:
            continue
        all_training_data.append((history_board[step], history_move[step]))
    # all_training_data.append(("end", None))

all_training_data

100%|██████████| 2/2 [01:06<00:00, 33.14s/it]


[([[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
   [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
   [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
   [[0, 0], [0, 0], [0, 0], [1, 1], [2, 2]],
   [[0, 0], [0, 0], [0, 0], [0, 0], [1, 1]]],
  (3, 4)),
 ([[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
   [[0, 0], [-1, 1], [-2, 2], [-1, 1], [0, 0]],
   [[0, 0], [0, 0], [-1, 1], [0, 0], [0, 0]],
   [[0, 0], [0, 0], [0, 0], [1, 1], [2, 2]],
   [[0, 0], [0, 0], [0, 0], [0, 0], [1, 1]]],
  (1, 2)),
 ([[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
   [[0, 0], [-1, 1], [-2, 2], [-1, 1], [0, 0]],
   [[0, 0], [0, 0], [-1, 1], [0, 0], [0, 0]],
   [[0, 0], [1, 1], [2, 2], [2, 2], [2, 2]],
   [[0, 0], [0, 0], [1, 1], [0, 0], [1, 1]]],
  (3, 2)),
 ([[[0, 0], [0, 0], [-1, 1], [-2, 2], [-1, 1]],
   [[0, 0], [-1, 1], [-2, 2], [-2, 2], [0, 0]],
   [[0, 0], [0, 0], [-1, 1], [0, 0], [0, 0]],
   [[0, 0], [1, 1], [2, 2], [2, 2], [2, 2]],
   [[0, 0], [0, 0], [1, 1], [0, 0], [1, 1]]],
  (0, 3)),
 ([[[0, 0], [0, 0], [-1, 1], [-2, 2],

In [15]:
import torch
from torch.utils.data import DataLoader, Dataset

class ChessDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        board_state, move = self.data[idx]
        if move is None:
            return board_state, move  # 返回标记
        board_state = torch.tensor(board_state, dtype=torch.float32)
        move = move[0] * 5 + move[1]
        return board_state, move

dataset = ChessDataset(all_training_data)

# 过滤掉 None 数据
filtered_data = [item for item in dataset if item[0] is not None]

dataloader = DataLoader(filtered_data, batch_size=32, shuffle=True)

In [12]:
dataloader

<torch.utils.data.dataloader.DataLoader at 0x1db6d3cc3d0>

In [17]:
import torch.nn as nn
import torch.optim as optim

class ChessModel(nn.Module):
    def __init__(self):
        super(ChessModel, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 5 * 5, 128)
        self.fc2 = nn.Linear(128, 25)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.reshape(-1, 64 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = ChessModel().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        inputs, labels = data
        inputs = inputs.permute(0, 3, 1, 2).cuda()
        labels = labels.cuda()

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.3f}')
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'chess_ai_model.pth')


Finished Training


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


In [18]:
import numpy as np
from ai.ai_algorithm import AIAlgorithm

class DeepLearningAI(AIAlgorithm):
    def __init__(self, model_path):
        self.model = ChessModel().cuda()
        self.model.load_state_dict(torch.load(model_path))
        self.model.eval()

    def find_best_move(self, game: ChessGame):
        board_state = np.array(game.chessboard).reshape(1, 5, 5, 2)
        board_state = torch.tensor(board_state, dtype=torch.float32).permute(0, 3, 1, 2).cuda()
        with torch.no_grad():
            outputs = self.model(board_state)
            move_index = torch.argmax(outputs).item()
            move = (move_index // 5, move_index % 5)
        return move

# 加载深度学习AI
deep_learning_ai = DeepLearningAI('chess_ai_model.pth')

# 与其他AI算法进行对战
# ai_battle(deep_learning_ai, minimax_ai, display=True)
ai_battle(deep_learning_ai, mcts_ai_0, display=True)


游戏开始！
蓝方AI: DeepLearningAI
红方AI: MCTSAI

[
  [[2, 2], [1, 1], [0, 0], [0, 0], [0, 0]],
  [[1, 1], [0, 0], [0, 0], [0, 0], [0, 0]],
  [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
  [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
  [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]
]
第1步: 蓝方 落子在 (0, 0)
获胜概率: 0.00%
分数: 1.5
用时: 0.03s

[
  [[ 2,  2], [ 1,  1], [ 0,  0], [ 0,  0], [ 0,  0]],
  [[ 0,  2], [-2,  2], [-1,  1], [ 0,  0], [ 0,  0]],
  [[ 0,  0], [-1,  1], [ 0,  0], [ 0,  0], [ 0,  0]],
  [[ 0,  0], [ 0,  0], [ 0,  0], [ 0,  0], [ 0,  0]],
  [[ 0,  0], [ 0,  0], [ 0,  0], [ 0,  0], [ 0,  0]]
]
第2步: 红方 落子在 (1, 1)
获胜概率: 68.00%
分数: -3.5
用时: 2.08s



AssertionError: 须在值为0处落子, (0,0)为2