In [23]:
import tensorflow as tf
from keras import layers

class ActorCritic(tf.keras.Model):
    def __init__(self, input_dim, action_dim):
        super().__init__()
        self.common = tf.keras.Sequential([
            layers.Input(shape=(input_dim,)),
            layers.Dense(512, activation='relu')
        ])
        self.actor = layers.Dense(action_dim, activation='softmax')
        self.critic = layers.Dense(1, activation='linear')

    def call(self, inputs):
        x = self.common(inputs)
        return self.actor(x), self.critic(x)

In [28]:
import chess
import chess.engine
import chess.pgn
import tensorflow as tf
import numpy as np
from itertools import product

# ===== Load All UCI Moves =====
def generate_all_possible_moves():
    squares = [chess.square_name(i) for i in range(64)]
    promotions = ['q', 'r', 'b', 'n']
    normal_moves = [a + b for a in squares for b in squares]
    promo_moves = [a + b + p for a in squares for b in squares for p in promotions]
    return sorted(set(normal_moves + promo_moves))

ALL_MOVES = generate_all_possible_moves()
MOVE_IDX = {move: i for i, move in enumerate(ALL_MOVES)}

# ===== Convert Board to State (8x8x12 -> 768) =====
def board_to_state(board):
    state = np.zeros((8, 8, 12), dtype=np.float32)
    piece_to_index = {
        'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
        'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
    }
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece:
            row, col = divmod(square, 8)
            idx = piece_to_index[piece.symbol()]
            state[row][col][idx] = 1
    return state.flatten()

# ===== Load Trained Actor-Critic Model =====
model = ActorCritic(input_dim=768, action_dim=len(ALL_MOVES))

# ===== Start Engine =====
engine = chess.engine.SimpleEngine.popen_uci("stockfish\\stockfish-windows-x86-64-avx2.exe")  # Replace with actual path

# ===== Start Game =====
board = chess.Board()
game = chess.pgn.Game()
node = game

print(board, "\n")

while not board.is_game_over():
    if board.turn == chess.WHITE:
        # === Agent's Turn ===
        state = board_to_state(board)
        state_tensor = tf.convert_to_tensor([state], dtype=tf.float32)
        policy, _ = model(state_tensor)

        legal_moves = [move.uci() for move in board.legal_moves]
        legal_indices = [MOVE_IDX[m] for m in legal_moves if m in MOVE_IDX]

        mask = np.zeros(len(ALL_MOVES), dtype=np.float32)
        mask[legal_indices] = 1

        masked_policy = policy.numpy()[0] * mask
        masked_policy /= masked_policy.sum() + 1e-8

        move_idx = np.random.choice(len(ALL_MOVES), p=masked_policy)
        move_uci = ALL_MOVES[move_idx]

        try:
            move = chess.Move.from_uci(move_uci)
            if move in board.legal_moves:
                board.push(move)
                print(f"Agent (White): {move_uci}")
                node = node.add_variation(move)
            else:
                print("Agent tried illegal move:", move_uci)
                break
        except:
            print("Agent generated invalid move:", move_uci)
            break

    else:
        # === Stockfish's Turn ===
        result = engine.play(board, chess.engine.Limit(time=0.1))
        board.push(result.move)
        print(f"Stockfish (Black): {result.move}")
        node = node.add_variation(result.move)

    print(board)
    print("-" * 40)

# ===== Game Over =====
result = board.result()
print("Game over. Result:", result)

# Save PGN
with open("agent_vs_stockfish.pgn", "w") as f:
    print(game, file=f)

engine.quit()


NotImplementedError: 