Imports

In [None]:
import os
import chess
import chess.engine
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Conv2D, Flatten, Input
from collections import deque
import random
import csv
import h5py
import time
import threading


Model core

In [None]:
GAMMA = 0.95
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-4
MEMORY_SIZE = 10000
TAU_MAX = 1.0
TAU_MIN = 0.1
TAU_DECAY = 0.9995
MAX_DEPTH = 16
NUM_THREADS = 4
TIME_LIMIT = 2.0

def build_model(input_shape=(8, 8, 12)):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
    model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='linear'))
    optimizer = tf.keras.optimizers.AdamW(learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

class Node:
    def __init__(self, board, parent=None, move=None, agent=None):
        self.board = board.copy()
        self.parent = parent
        self.move = move
        self.children = []
        self.visits = 0
        self.value = 0.0
        self.agent = agent
        self.lock = threading.Lock()

    def is_fully_expanded(self):
        return len(self.children) == len(list(self.board.legal_moves))

    def best_child(self):
        with self.lock:
            visits = np.array([child.visits for child in self.children], dtype=np.float32)
            q_values = np.array([child.value / (child.visits + 1e-8) for child in self.children])
            probabilities = self.agent.softmax(q_values)
            if probabilities is None or len(probabilities) == 0:
                return random.choice(self.children)
            return np.random.choice(self.children, p=probabilities)

    def expand(self):
        with self.lock:
            tried_moves = [child.move for child in self.children]
            legal_moves = list(self.board.legal_moves)
            for move in legal_moves:
                if move not in tried_moves:
                    new_board = self.board.copy()
                    new_board.push(move)
                    child_node = Node(new_board, parent=self, move=move, agent=self.agent)
                    self.children.append(child_node)
                    return child_node
            return None

class ChessAgent:
    def __init__(self):
        self.memory = deque(maxlen=MEMORY_SIZE)
        self.tau = TAU_MAX
        self.model = build_model()
        self.transposition_table = {}
        self.tree_lock = threading.Lock()

    def softmax(self, x):
        if len(x) == 0:
            return None
        z = x - np.max(x)
        e_x = np.exp(z / self.tau)
        softmax_values = e_x / e_x.sum()
        if np.any(np.isnan(softmax_values)) or np.any(np.isinf(softmax_values)):
            return np.ones_like(e_x) / len(e_x)
        return softmax_values

    def act_with_mcts_and_softmax(self, board, time_limit=TIME_LIMIT, num_threads=NUM_THREADS, max_depth=MAX_DEPTH):
        root = Node(board, agent=self)
        root.visits = 1
        start_time = time.time()

        def run_simulation():
            node = root
            while node.is_fully_expanded() and not node.board.is_game_over():
                node = node.best_child()
            if not node.board.is_game_over():
                node = node.expand()
                if node is None:
                    return
            result = self.simulate(node.board, max_depth=max_depth)
            self.backpropagate(node, result)

        threads = []
        while time.time() - start_time < time_limit:
            if len(threads) < num_threads:
                thread = threading.Thread(target=run_simulation)
                thread.start()
                threads.append(thread)
            threads = [t for t in threads if t.is_alive()]
            time.sleep(0.01)

        for thread in threads:
            thread.join()

        if len(root.children) == 0:
            return random.choice(list(board.legal_moves))
        visits = np.array([child.visits for child in root.children], dtype=np.float32)
        probabilities = self.softmax(visits)
        if probabilities is None:
            return random.choice(list(board.legal_moves))
        best_child = np.random.choice(root.children, p=probabilities)
        return best_child.move

    def backpropagate(self, node, result):
        while node is not None:
            with node.lock:
                node.visits += 1
                node.value += result
            node = node.parent

    def simulate(self, board, max_depth=MAX_DEPTH):
        current_board = board.copy()
        depth = 0
        while not current_board.is_game_over() and depth < max_depth:
            legal_moves = list(current_board.legal_moves)
            if not legal_moves:
                break
            move = random.choice(legal_moves)
            current_board.push(move)
            depth += 1
        result = self.evaluate_state(current_board)
        return result

    def evaluate_state(self, board):
        board_fen = board.fen()
        if board_fen in self.transposition_table:
            return self.transposition_table[board_fen]
        else:
            input_state = self.state_to_input(board)
            value = self.model.predict(input_state, verbose=0)[0][0]
            self.transposition_table[board_fen] = value
            return value

    def state_to_input(self, board):
        planes = np.zeros((8, 8, 12))
        for square in chess.SQUARES:
            piece = board.piece_at(square)
            if piece:
                piece_type = piece.piece_type - 1
                color = 0 if piece.color == chess.WHITE else 6
                row = 7 - chess.square_rank(square)
                col = chess.square_file(square)
                planes[row, col, piece_type + color] = 1
        return np.expand_dims(planes, axis=0)

def material_count(board):
    piece_values = {
        chess.PAWN: 1,
        chess.KNIGHT: 3,
        chess.BISHOP: 3,
        chess.ROOK: 5,
        chess.QUEEN: 9,
        chess.KING: 0
    }
    material = 0
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece:
            value = piece_values[piece.piece_type]
            material += value if piece.color == chess.WHITE else -value
    return material

def get_reward(board, previous_material_count):
    reward = 0
    if board.is_checkmate():
        reward = 1 if board.turn == chess.BLACK else -1
    elif board.is_stalemate() or board.is_insufficient_material():
        reward = 0
    else:
        current_material_count = material_count(board)
        material_difference = current_material_count - previous_material_count
        reward += 0.1 * material_difference
        center_squares = [chess.D4, chess.E4, chess.D5, chess.E5]
        for square in center_squares:
            piece = board.piece_at(square)
            if piece and piece.color == board.turn:
                reward += 0.05
        reward += 0.01 * len(list(board.legal_moves))
        if board.is_check():
            reward -= 0.3
    return reward

def load_model_from_path(agent, model_path):
    if os.path.exists(model_path):
        print(f"Wczytywanie modelu z: {model_path}")
        agent.model = load_model(model_path, compile=False)
        optimizer = tf.keras.optimizers.AdamW(learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        agent.model.compile(optimizer=optimizer, loss='mean_squared_error')
    else:
        raise FileNotFoundError(f"Nie znaleziono modelu: {model_path}")


Game logic

In [None]:
def play_game(agent_white, agent_black, stockfish_path=None, stockfish_skill=None):
    board = chess.Board()
    done = False
    total_reward = 0
    moves_in_game = 0
    previous_material_count = material_count(board)

    if stockfish_path and stockfish_skill:
        engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
        engine.configure({"Skill Level": stockfish_skill})
    else:
        engine = None

    while not done:
        if board.turn == chess.WHITE:
            state = agent_white.state_to_input(board)
            action = agent_white.act_with_mcts_and_softmax(board, time_limit=TIME_LIMIT, 
                                                           num_threads=NUM_THREADS, 
                                                           max_depth=MAX_DEPTH)
            board.push(action)
            done = board.is_game_over()
            next_state = agent_white.state_to_input(board)
            reward = get_reward(board, previous_material_count)
            previous_material_count = material_count(board)
            total_reward += reward
            moves_in_game += 1
        else:
            if engine is not None:
                result = engine.play(board, chess.engine.Limit(time=0.1))
                board.push(result.move)
                done = board.is_game_over()
            else:
                state = agent_black.state_to_input(board)
                action = agent_black.act_with_mcts_and_softmax(board, time_limit=TIME_LIMIT, 
                                                               num_threads=NUM_THREADS, 
                                                               max_depth=MAX_DEPTH)
                board.push(action)
                done = board.is_game_over()
                next_state = agent_black.state_to_input(board)
                reward = get_reward(board, previous_material_count)
                previous_material_count = material_count(board)
                total_reward += reward
                moves_in_game += 1

    if engine is not None:
        engine.quit()

    if board.result() == '1-0':
        result_str = 'Win_White'
    elif board.result() == '0-1':
        result_str = 'Win_Black'
    else:
        result_str = 'Draw'

    avg_reward = total_reward / moves_in_game if moves_in_game > 0 else 0
    return result_str, moves_in_game, avg_reward


Model vs stockfish

In [None]:
def evaluate_agent_vs_stockfish(model_path, stockfish_path, skill_level, games=10, stats_path="eval_stats_vs_stockfish.csv"):
    agent = ChessAgent()
    load_model_from_path(agent, model_path)

    if not os.path.exists(stats_path):
        with open(stats_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Game', 'Result', 'Moves', 'StockfishSkill'])

    for g in range(games):
        result_str, moves_in_game, avg_reward = play_game(agent, None, stockfish_path=stockfish_path, stockfish_skill=skill_level)
        with open(stats_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([g+1, result_str, moves_in_game, skill_level])
        print(f"Gra {g+1}/{games}, Wynik: {result_str}, Ruchy: {moves_in_game}")


Model vs model

In [None]:
def evaluate_two_agents(model_path_white, model_path_black, games=10, stats_path="eval_stats_model_vs_model.csv"):
    agent_white = ChessAgent()
    agent_black = ChessAgent()
    load_model_from_path(agent_white, model_path_white)
    load_model_from_path(agent_black, model_path_black)

    if not os.path.exists(stats_path):
        with open(stats_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Game', 'Result', 'Moves', 'StockfishSkill'])

    for g in range(games):
        result_str, moves_in_game, avg_reward = play_game(agent_white, agent_black)
        with open(stats_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([g+1, result_str, moves_in_game, skill_level])
        print(f"Gra {g+1}/{games}, Wynik: {result_str}, Ruchy: {moves_in_game}")

Example tests

In [None]:
evaluate_agent_vs_stockfish("saved_model/10000.h5", "stockfish/stockfish-windows-x86-64.exe", skill_level=5, games=20)

In [None]:
evaluate_two_agents("saved_model/10000.h5", "saved_model_without_pretrained/10000.h5", games=20)

In [None]:
evaluate_two_agents("pretrained_model.h5", "saved_model_without_pretrained/10000.h5", games=20)