In [1]:
!pip install cshogi



In [2]:
import cshogi
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pickle
from tqdm import tqdm

In [3]:
class ShogiFeatures:
    def __init__(self):
        self.piece_planes = 14  # 駒の種類数
        self.board_size = 81    # 9x9

    def position_to_features(self, board):
        # 盤面の特徴量（14種類の駒 x 2プレイヤー）
        features = np.zeros((28, 9, 9), dtype=np.float32)

        # 盤上の駒の配置
        for square in range(81):
            piece = board.piece(square)
            if piece != 0:
                piece_type = piece & 0b1111
                is_black = (piece & 0b10000) == 0
                row = square // 9
                col = square % 9
                if is_black:
                    features[piece_type-1][row][col] = 1
                else:
                    features[piece_type+13][row][col] = 1

        # 持ち駒の情報を追加
        for color in range(2):
            for piece_type in range(1, 8):
                count = board.pieces_in_hand[color][piece_type]
                if color == 0:  # 先手
                    features[piece_type+27][0][0] = count
                else:  # 後手
                    features[piece_type+34][0][0] = count

        return features

In [4]:
class ShogiNet(nn.Module):
    def __init__(self):
        super(ShogiNet, self).__init__()
        self.conv1 = nn.Conv2d(28, 256, 3, padding=1)
        self.conv2 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv3 = nn.Conv2d(256, 256, 3, padding=1)

        self.policy_conv = nn.Conv2d(256, 256, 1)
        self.policy_fc = nn.Linear(256 * 81, 2187)  # 最大の合法手数

        self.value_conv = nn.Conv2d(256, 1, 1)
        self.value_fc1 = nn.Linear(81, 256)
        self.value_fc2 = nn.Linear(256, 1)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))

        # Policy head
        policy = torch.relu(self.policy_conv(x))
        policy = policy.view(-1, 256 * 81)
        policy = self.policy_fc(policy)

        # Value head
        value = torch.relu(self.value_conv(x))
        value = value.view(-1, 81)
        value = torch.relu(self.value_fc1(value))
        value = torch.tanh(self.value_fc2(value))

        return policy, value

In [5]:
class ShogiDataset(Dataset):
    def __init__(self, positions, moves, outcomes):
        self.positions = positions
        self.moves = moves
        self.outcomes = outcomes
        self.features = ShogiFeatures()

    def __len__(self):
        return len(self.positions)

    def __getitem__(self, idx):
        features = self.features.position_to_features(self.positions[idx])
        return (torch.FloatTensor(features),
                torch.LongTensor([self.moves[idx]]),
                torch.FloatTensor([self.outcomes[idx]]))

In [6]:
def train_model(model, train_loader, optimizer, epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    policy_criterion = nn.CrossEntropyLoss()
    value_criterion = nn.MSELoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_idx, (data, moves, outcomes) in enumerate(tqdm(train_loader)):
            data, moves, outcomes = data.to(device), moves.to(device), outcomes.to(device)

            optimizer.zero_grad()
            policy_out, value_out = model(data)

            policy_loss = policy_criterion(policy_out, moves.squeeze())
            value_loss = value_criterion(value_out, outcomes)
            loss = policy_loss + value_loss

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f'Epoch {epoch+1}, Average loss: {total_loss / len(train_loader)}')

In [7]:
class DeepShogiAI:
    def __init__(self, model):
        self.model = model
        self.features = ShogiFeatures()

    def select_move(self, board):
        self.model.eval()
        features = self.features.position_to_features(board)
        features_tensor = torch.FloatTensor(features).unsqueeze(0)

        with torch.no_grad():
            policy_out, _ = self.model(features_tensor)
            policy = torch.softmax(policy_out, dim=1)

        legal_moves = list(board.legal_moves)
        move_probs = policy[0][legal_moves].numpy()

        # 確率に基づいて手を選択
        selected_idx = np.random.choice(len(legal_moves), p=move_probs/move_probs.sum())
        return legal_moves[selected_idx]

In [8]:
def play_test_game(ai, num_moves=10):
    board = cshogi.Board()
    print("Initial position:")
    print(board)

    for i in range(num_moves):
        move = ai.select_move(board)
        board.push(move)
        print(f"\nMove {i+1}:")
        print(board)

In [10]:
# モデルとAIの初期化
model = ShogiNet()
optimizer = optim.Adam(model.parameters())

# データセットの作成と学習
#dataset = ShogiDataset(positions, moves, outcomes)
#train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
#train_model(model, train_loader, optimizer)

# テスト対局
ai = DeepShogiAI(model)
play_test_game(ai)

Initial position:
'  9  8  7  6  5  4  3  2  1
P1-KY-KE-GI-KI-OU-KI-GI-KE-KY
P2 * -HI *  *  *  *  * -KA * 
P3-FU-FU-FU-FU-FU-FU-FU-FU-FU
P4 *  *  *  *  *  *  *  *  * 
P5 *  *  *  *  *  *  *  *  * 
P6 *  *  *  *  *  *  *  *  * 
P7+FU+FU+FU+FU+FU+FU+FU+FU+FU
P8 * +KA *  *  *  *  * +HI * 
P9+KY+KE+GI+KI+OU+KI+GI+KE+KY
+



IndexError: index 28 is out of bounds for axis 0 with size 28

In [4]:
import cshogi
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader

class ShogiDataset(Dataset):
    def __init__(self, kifu_list):
        self.board_states = []
        self.moves = []

        for kifu in kifu_list:
            board = cshogi.Board()
            for move in kifu:
                # 盤面を特徴量に変換
                features = self.board_to_features(board)
                self.board_states.append(features)
                self.moves.append(move)
                board.push(move)

    def board_to_features(self, board):
        # 14種類の駒 × 2プレイヤー + 持ち駒の特徴
        features = np.zeros((30, 9, 9), dtype=np.float32)

        # 盤上の駒の配置
        for square in range(81):
            piece = board.piece(square)
            if piece != 0:
                piece_type = piece & 0b1111
                is_black = (piece & 0b10000) == 0
                row = square // 9
                col = square % 9
                channel = piece_type - 1 + (0 if is_black else 14)
                features[channel][row][col] = 1

        # 持ち駒の特徴
        for color in range(2):
            for piece_type in range(7):
                count = board.pieces_in_hand[color][piece_type + 1]
                if count > 0:
                    channel = 28 + color
                    features[channel] += count / 18.0  # 正規化

        return features

    def __len__(self):
        return len(self.board_states)

    def __getitem__(self, idx):
        return self.board_states[idx], self.moves[idx]

class ShogiNet(nn.Module):
    def __init__(self):
        super(ShogiNet, self).__init__()
        self.conv1 = nn.Conv2d(30, 256, 3, padding=1)
        self.conv2 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv3 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv4 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv5 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv6 = nn.Conv2d(256, 256, 3, padding=1)
        self.policy_conv = nn.Conv2d(256, 27, 1)  # 移動の種類に応じた出力チャンネル
        self.bn1 = nn.BatchNorm2d(256)
        self.bn2 = nn.BatchNorm2d(256)
        self.bn3 = nn.BatchNorm2d(256)
        self.bn4 = nn.BatchNorm2d(256)
        self.bn5 = nn.BatchNorm2d(256)
        self.bn6 = nn.BatchNorm2d(256)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        x = self.relu(self.bn5(self.conv5(x)))
        x = self.relu(self.bn6(self.conv6(x)))
        policy = self.policy_conv(x)
        return policy

class DeepLearningAI:
    def __init__(self, model_path=None):
        self.


SyntaxError: invalid syntax (<ipython-input-4-a70ae8546f56>, line 83)

In [3]:
import cshogi
import numpy as np
from collections import defaultdict

class SimpleAI:
    def __init__(self):
        self.piece_values = {
            1: 100,   # 歩
            2: 400,   # 香
            3: 300,   # 桂
            4: 450,   # 銀
            5: 600,   # 金
            6: 900,   # 角
            7: 1000,  # 飛
            8: 15000, # 玉
            9: 200,   # と
            10: 500,  # 成香
            11: 500,  # 成桂
            12: 550,  # 成銀
            13: 1000, # 馬
            14: 1200, # 龍
        }

    def evaluate_position(self, board):
        score = 0
        pieces = defaultdict(int)

        # 盤上の駒の評価
        for square in range(81):
            piece = board.piece(square)
            if piece != 0:
                piece_type = piece & 0b1111
                is_black = (piece & 0b10000) == 0
                value = self.piece_values[piece_type]
                pieces[piece_type] += 1 if is_black else -1
                score += value if is_black else -value

        # 持ち駒の評価
        for piece_type in range(1, 8):
            if piece_type < len(board.pieces_in_hand[0]) and piece_type < len(board.pieces_in_hand[1]):
                count_black = board.pieces_in_hand[0][piece_type]
                count_white = board.pieces_in_hand[1][piece_type]
                value = self.piece_values[piece_type]
                score += value * count_black - value * count_white

        return score

    def search_best_move(self, board, depth=3):
        def alpha_beta(board, depth, alpha, beta, maximizing_player):
            if depth == 0:
                return self.evaluate_position(board)

            if maximizing_player:
                max_eval = float('-inf')
                moves = list(board.legal_moves)
                for move in moves:
                    board.push(move)
                    eval = alpha_beta(board, depth - 1, alpha, beta, False)
                    board.pop()
                    max_eval = max(max_eval, eval)
                    alpha = max(alpha, eval)
                    if beta <= alpha:
                        break
                return max_eval
            else:
                min_eval = float('inf')
                moves = list(board.legal_moves)
                for move in moves:
                    board.push(move)
                    eval = alpha_beta(board, depth - 1, alpha, beta, True)
                    board.pop()
                    min_eval = min(min_eval, eval)
                    beta = min(beta, eval)
                    if beta <= alpha:
                        break
                return min_eval

        best_move = None
        best_value = float('-inf')
        moves = list(board.legal_moves)

        for move in moves:
            board.push(move)
            value = alpha_beta(board, depth - 1, float('-inf'), float('inf'), False)
            board.pop()

            if value > best_value:
                best_value = value
                best_move = move

        return best_move

# テスト用のコード
def test_ai():
    board = cshogi.Board()
    ai = SimpleAI()

    print("初期局面:")
    print(board)

    # 5手指してみる
    for i in range(5):
        move = ai.search_best_move(board, depth=3)
        board.push(move)
        print(f"\n{i+1}手目:")
        print(board)
        print(f"評価値: {ai.evaluate_position(board)}")

if __name__ == "__main__":
    test_ai()


初期局面:
'  9  8  7  6  5  4  3  2  1
P1-KY-KE-GI-KI-OU-KI-GI-KE-KY
P2 * -HI *  *  *  *  * -KA * 
P3-FU-FU-FU-FU-FU-FU-FU-FU-FU
P4 *  *  *  *  *  *  *  *  * 
P5 *  *  *  *  *  *  *  *  * 
P6 *  *  *  *  *  *  *  *  * 
P7+FU+FU+FU+FU+FU+FU+FU+FU+FU
P8 * +KA *  *  *  *  * +HI * 
P9+KY+KE+GI+KI+OU+KI+GI+KE+KY
+


1手目:
'  9  8  7  6  5  4  3  2  1
P1-KY-KE-GI-KI-OU-KI-GI-KE-KY
P2 * -HI *  *  *  *  * -KA * 
P3-FU-FU-FU-FU-FU-FU-FU-FU-FU
P4 *  *  *  *  *  *  *  *  * 
P5 *  *  *  *  *  *  *  *  * 
P6 *  * +FU *  *  *  *  *  * 
P7+FU+FU * +FU+FU+FU+FU+FU+FU
P8 * +KA *  *  *  *  * +HI * 
P9+KY+KE+GI+KI+OU+KI+GI+KE+KY
-

評価値: 0

2手目:
'  9  8  7  6  5  4  3  2  1
P1-KY-KE-GI-KI-OU-KI-GI-KE-KY
P2 * -HI *  *  *  *  * -KA * 
P3-FU-FU-FU-FU-FU-FU-FU-FU * 
P4 *  *  *  *  *  *  *  * -FU
P5 *  *  *  *  *  *  *  *  * 
P6 *  * +FU *  *  *  *  *  * 
P7+FU+FU * +FU+FU+FU+FU+FU+FU
P8 * +KA *  *  *  *  * +HI * 
P9+KY+KE+GI+KI+OU+KI+GI+KE+KY
+

評価値: 0

3手目:
'  9  8  7  6  5  4  3  2  1
P1-KY-KE-GI-KI-OU-KI-GI-KE-