In [None]:
#ULTRA SUPER SIGMA CHESS AI

In [None]:
!pip install chess



In [None]:
from google.colab import drive
drive.mount('/content/drive')

!chmod +x "/content/drive/My Drive/stockfish-ubuntu-x86-64-avx2"


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import math

import chess

from collections import deque

import tensorflow as tf

from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import HeNormal

import chess.engine
from tensorflow.keras.callbacks import ModelCheckpoint

import threading
from concurrent.futures import ThreadPoolExecutor, as_completed

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

class GameState:
    row = 8
    col = 8
    promotion_indexes = {
        chess.KNIGHT: 0,
        chess.ROOK: 1,
        chess.BISHOP: 2
    }

    def __init__(self) -> None:
        self.board = chess.Board()
        self.repetition_count = 0
        self.player_color: chess.Color = chess.WHITE

    def get_initial_state(self):
        self.board.reset()

        return self.get_current_state()

    def get_current_state(self, T=8):
        input_tensor = np.zeros((8, 8, 119), dtype=np.uint8)

        for t in range(T):
            _t = T - t - 1
            if len(self.board.move_stack) < _t:
                continue

            self.create_input(input_tensor, _t)

        color = 0 if self.board.turn == chess.WHITE else 1
        input_tensor[:, :, 112] = color

        input_tensor[:, :, 113] = len(self.board.move_stack) > 0

        p1_castling = (1 * self.board.has_kingside_castling_rights(chess.WHITE)) | (2 * self.board.has_queenside_castling_rights(chess.WHITE))
        p1_castling_bit = format(p1_castling, "02b")
        input_tensor[:, :, 114] = int(p1_castling_bit[0])
        input_tensor[:, :, 115] = int(p1_castling_bit[1])

        p2_castling = (1 * self.board.has_kingside_castling_rights(chess.BLACK)) | (2 * self.board.has_queenside_castling_rights(chess.BLACK))
        p2_castling_bit = format(p2_castling, "02b")
        input_tensor[:, :, 116] = int(p2_castling_bit[0])
        input_tensor[:, :, 117] = int(p2_castling_bit[1])

        input_tensor[:, :, 118] = int(self.board.is_fifty_moves())

        return np.expand_dims(input_tensor, axis=0)

    def get_next_state(self, action: int):
        source_index = action // 73
        destination_index = 0
        move_type = action % 73

        promotion = None

        if move_type < 56:
            direction = move_type // 7
            movement = (move_type % 7) + 1

            destination_index = source_index + (movement * 8) if direction == 0 else destination_index
            destination_index = source_index + (movement * 9) if direction == 1 else destination_index
            destination_index = source_index + movement if direction == 2 else destination_index
            destination_index = source_index + (movement * -7) if direction == 3 else destination_index
            destination_index = source_index + (movement * -8) if direction == 4 else destination_index
            destination_index = source_index + (movement * -9) if direction == 5 else destination_index
            destination_index = source_index + (-movement) if direction == 6 else destination_index
            destination_index = source_index + (movement * 7) if direction == 7 else destination_index
        elif move_type >= 56 and move_type < 64:
            direction = move_type - 56

            destination_index = source_index + 17 if direction == 0 else destination_index
            destination_index = source_index + 10 if direction == 1 else destination_index
            destination_index = source_index - 6 if direction == 2 else destination_index
            destination_index = source_index - 15 if direction == 3 else destination_index
            destination_index = source_index - 17 if direction == 4 else destination_index
            destination_index = source_index - 10 if direction == 5 else destination_index
            destination_index = source_index + 6 if direction == 6 else destination_index
            destination_index = source_index + 15 if direction == 7 else destination_index
        else:
            direction = (move_type - 64) // 3
            promotion_index = (move_type - 64) % 3

            promotion = chess.KNIGHT if promotion_index == 0 else (chess.ROOK if promotion_index == 1 else chess.BISHOP)

            color_value = 1 if self.board.turn == chess.WHITE else -1

            if direction == 0:
                destination_index = source_index + (8 * color_value)
            elif direction == 1:
                destination_index = source_index + (9 * color_value)
            else:
                destination_index = source_index + (7 * color_value)

        from_square = chess.Square(source_index)
        to_square = chess.Square(destination_index)

        promotion_rank = 7 if self.board.turn == chess.WHITE else 0

        if promotion is None:
            if self.board.piece_type_at(from_square) == chess.PAWN and chess.square_rank(to_square) == promotion_rank:
                promotion = chess.QUEEN

        move = chess.Move(from_square, to_square, promotion)

        self.apply_action(move)

        return move, self.get_current_state()

    def apply_action(self, move: chess.Move):
        try:
            self.board.push(move)
        except Exception as e:
            print(list(self.board.legal_moves))
            print(self.get_valid_moves())

            print(e)

            raise Exception("Error")

    def create_input(self, input_tensor: np.ndarray, t: int):
        piece_types = {
            chess.PAWN: 0,
            chess.KNIGHT: 1,
            chess.BISHOP: 2,
            chess.ROOK: 3,
            chess.QUEEN: 4,
            chess.KING: 5
        }

        board = self.board.copy()
        for _ in range(t):
            board.pop()

        transposition_key = board._transposition_key()

        for square in chess.SQUARES:
            piece = board.piece_at(square)

            if piece is None:
                continue

            piece_index = piece_types[piece.piece_type]
            piece_color = 0 if piece.color == chess.WHITE else 1

            index = (t * 14) + (piece_color * 6) + piece_index
            input_tensor[square // 8][square % 8][index] = 1

        repetition_count = 0
        index = (t * 14) + 12

        try:
            while board.move_stack:
                move = board.pop()
                if board.is_irreversible(move):
                    break

                if board._transposition_key() == transposition_key:
                    repetition_count += 1

                if repetition_count == 3:
                    break
        finally:
            repetition_count = 3 if repetition_count > 3 else repetition_count

            repetition_count_bits = [int(x) for x in format(repetition_count, "02b")]
            input_tensor[:, :, index] = repetition_count_bits[0]
            input_tensor[:, :, index + 1] = repetition_count_bits[1]

    def get_valid_moves(self):
        legal_moves = []

        for valid_move in self.board.legal_moves:
            s_row, s_col, from_square_index = self.index_of_square(valid_move.from_square)
            d_row, d_col, to_square_index = self.index_of_square(valid_move.to_square)

            if valid_move.promotion:
                direction = self.direction_of_move_for_ray_directions(s_row, s_col, d_row, d_col)

                if valid_move.promotion == chess.QUEEN:
                    index = (from_square_index * 73) + (direction * 7)
                    legal_moves.append(index)
                else:
                    promotion_index = self.promotion_indexes[valid_move.promotion]

                    if direction > 2 and direction < 6:
                        direction = 0 if direction == 4 else (1 if direction == 5 else 2)
                    elif direction == 7:
                        direction = 2

                    index = (from_square_index * 73) + ((direction * 3) + promotion_index + 64)
                    legal_moves.append(index)
            elif self.board.piece_type_at(valid_move.from_square) == chess.KNIGHT:
                direction = self.direction_of_move_for_knights(s_row, s_col, d_row, d_col)

                index = (from_square_index * 73) + direction + 56
                legal_moves.append(index)

            else:
                direction = self.direction_of_move_for_ray_directions(s_row, s_col, d_row, d_col)
                count_of_square = self.count_of_square_for_movement(s_row, s_col, d_row, d_col) - 1

                index = (from_square_index * 73) + ((direction * 7) + count_of_square)
                legal_moves.append(index)

        return legal_moves

    def index_of_square(self, square: chess.Square):
        row = chess.square_rank(square)
        col = chess.square_file(square)
        index = (row * 8) + col

        return row, col, index

    def direction_of_move_for_ray_directions(self, s_row: int, s_col: int, d_row: int, d_col: int):
        delta_x = d_col - s_col
        delta_y = d_row - s_row

        if delta_x == 0:
            return 0 if delta_y > 0 else 4

        if delta_y == 0:
            return 2 if delta_x > 0 else 6

        if delta_x < 0:
            return 7 if delta_y > 0 else 5

        return 1 if delta_y > 0 else 3

    def direction_of_move_for_knights(self, s_row: int, s_col: int, d_row: int, d_col: int):
        delta_x = d_col - s_col
        delta_y = d_row - s_row

        if delta_x == 1:
            return 0 if delta_y > 0 else 3

        if delta_x == 2:
            return 1 if delta_y > 0 else 2

        if delta_x == -1:
            return 7 if delta_y > 0 else 4

        return 6 if delta_y > 0 else 5

    def count_of_square_for_movement(self, s_row: int, s_col: int, d_row: int, d_col: int):
        delta_x = d_col - s_col
        delta_y = d_row - s_row

        return max(abs(delta_x), abs(delta_y))

    def get_winner(self):
        result = self.board.result()

        if result == "1-0":
            return chess.WHITE

        if result == "0-1":
            return chess.BLACK

        return 2

    def is_terminal(self):
        return self.board.is_game_over()

    def clone(self):
        cloned_state = GameState()
        cloned_state.board = self.board.copy()

        return cloned_state

class Node:
    def __init__(self, state, parent=None, prior_prob=1.0):
        self.state = state
        self.parent = parent
        self.children = {}
        self.visits = 0
        self.value_sum = 0
        self.prior_prob = prior_prob
        self.is_expanded = False

    @property
    def value(self):
        return self.value_sum / (self.visits + 1e-5)

    def expand(self, action_probs):
        if not self.children:
            for action, prob in enumerate(action_probs):
                if prob > 0 and action not in self.children:
                    next_state = self.state.clone()
                    next_state.get_next_state(action)
                    self.children[action] = Node(next_state, parent=self, prior_prob=prob)

            self.is_expanded = len(self.children) > 0

    def select(self, c_puct=1.0):
      if not self.children:  # If no children exist
          return None, None

      max_ucb = -float('inf')
      best_action = None
      best_child = None

      for action, child in self.children.items():
          ucb = child.value + c_puct * child.prior_prob * (math.sqrt(self.visits) / (1 + child.visits))
          if ucb > max_ucb:
              max_ucb = ucb
              best_action = action
              best_child = child

      return best_action, best_child

    def backup(self, value):
        self.visits += 1
        self.value_sum += value
        if self.parent:
            self.parent.backup(-value)

class MCTS:
    def __init__(self, model, c_puct=1.0, simulations=300):
        self.model = model
        self.c_puct = c_puct
        self.simulations = simulations

    def add_dirichlet_noise(self, node, valid_moves):
        noise = np.random.dirichlet([0.3] * len(valid_moves))
        for idx, action in enumerate(valid_moves):
            if action in node.children:
                node.children[action].prior_prob = \
                    0.75 * node.children[action].prior_prob + 0.25 * noise[idx]

    def run(self, initial_state, temperature=1.0):
        root = Node(initial_state)

        action_probs, value = self.evaluate(initial_state)
        valid_moves = initial_state.get_valid_moves()

        noise = np.random.dirichlet([0.3] * len(valid_moves))

        for idx, action in enumerate(valid_moves):
            prob = action_probs[action]
            noisy_prob = 0.75 * prob + 0.25 * noise[idx]
            next_state = initial_state.clone()
            next_state.get_next_state(action)
            root.children[action] = Node(next_state, parent=root, prior_prob=noisy_prob)

        for _ in range(self.simulations):
            node = root

            while node.children and not node.state.is_terminal():
                action, child_node = node.select(self.c_puct)

                if child_node is None:
                    break

                node = child_node

            if not node.state.is_terminal():
                action_probs, value = self.evaluate(node.state)
                valid_moves = node.state.get_valid_moves()
                node.expand(action_probs)
            else:
                value = node.state.get_winner()
                value = 1 if value == node.state.player_color else (0 if value == 2 else -1)

            node.backup(value)

        return self.get_action_probs(root, temperature)

    def evaluate(self, state):
        state_tensor = state.get_current_state()
        # state_tensor = np.expand_dims(state_tensor, axis=0)

        policy, value = self.model.predict(state_tensor, verbose=0)

        # Mask invalid moves
        valid_moves = state.get_valid_moves()
        mask = np.zeros(policy.shape[1])
        mask[valid_moves] = 1

        policy = policy[0] * mask

        # Normalize
        sum_policy = np.sum(policy)
        if sum_policy > 0:
            policy /= sum_policy
        else:
            # If all moves were masked, use uniform distribution over valid moves
            policy = mask / np.sum(mask)

        print(value[0][0])
        return policy, value[0][0]

    def get_action_probs(self, root, temperature=1.0):
        visits = np.array([child.visits for action, child in root.children.items()])
        actions = list(root.children.keys())

        if temperature == 0:  # Pure exploitation
            action_idx = np.argmax(visits)
            probs = np.zeros_like(visits)
            probs[action_idx] = 1
        else:
            # Apply temperature
            visits = visits ** (1 / temperature)
            probs = visits / np.sum(visits)

        # Convert to full move probability vector
        full_probs = np.zeros(4672)  # Adjust size based on your action space
        for action, prob in zip(actions, probs):
            full_probs[action] = prob

        return full_probs

class ReplayBuffer:
    def __init__(self, maxlen=500000):
        self.buffer = deque(maxlen=maxlen)
        self.current_size = 0
        self.lock = threading.Lock()

    def store(self, state, policy, value):
        """Store a single game state transition"""
        self.buffer.append({
            'state': state,
            'policy': policy,
            'value': value
        })
        self.current_size = len(self.buffer)

    def store_multiple_data(self, states, policies, value):
        with self.lock:
            for s, p, v in zip(states, policies, [value]):
                self.store(s, p, v)

    def sample(self, batch_size):
        """Sample a batch with augmentations"""
        if self.current_size < batch_size:
            batch_size = self.current_size

        indices = np.random.choice(self.current_size, batch_size)
        states, policies, values = [], [], []

        for idx in indices:
            sample = self.buffer[idx]
            # Get augmented samples
            aug_states, aug_policies = self._augment_sample(
                sample['state'],
                sample['policy']
            )

            # Add all augmentations
            states.extend(aug_states)
            policies.extend(aug_policies)
            values.extend([sample['value']] * len(aug_states))

        return np.array(states), np.array(policies), np.array(values)

    def _augment_sample(self, state, policy):
        """Generate valid augmentations for a single sample"""
        # Remove batch dimension if present
        if len(state.shape) == 4:
            state = np.squeeze(state, axis=0)

        augmented_states = [state]
        augmented_policies = [policy]

        # Horizontal flip
        flip_h = np.flip(state, axis=1)
        augmented_states.append(flip_h)
        augmented_policies.append(policy)  # Policy needs game-specific mapping

        # Vertical flip
        flip_v = np.flip(state, axis=0)
        augmented_states.append(flip_v)
        augmented_policies.append(policy)  # Policy needs game-specific mapping

        # Diagonal flip (only if shape allows)
        if state.shape[0] == state.shape[1]:
            diag = np.transpose(state, (1, 0, 2))
            augmented_states.append(diag)
            augmented_policies.append(policy)  # Policy needs game-specific mapping

        return augmented_states, augmented_policies

    def __len__(self):
        return self.current_size

# tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

# tf.tpu.experimental.initialize_tpu_system(tpu)
# tpu_strategy = tf.distribute.TPUStrategy(tpu)

print("All devices: ", tf.config.list_logical_devices('GPU'))

policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)

All devices:  [LogicalDevice(name='/device:GPU:0', device_type='GPU')]


In [None]:
import numpy as np
import threading
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from concurrent.futures import ThreadPoolExecutor, as_completed


def EfficientNet_block(inputs, filters=256, expansion_factor=6, stride=1, se_ratio=0.25):
    """
    EfficientNet tarzı bir MBConv bloğu.

    Args:
        inputs: Giriş tensörü.
        filters: Çıkış filtrelerinin sayısı.
        expansion_factor: Genişletme oranı (Bottleneck genişletme için).
        stride: Adım boyutu (ör. 1 veya 2).
        se_ratio: Squeeze-and-Excitation (SE) oranı.

    Returns:
        EfficientNet MBConv bloğunun çıktısı.
    """
    input_channels = inputs.shape[-1]  # Giriş kanal sayısı

    # 1. Expand (Genişletme)
    x = layers.Conv2D(input_channels * expansion_factor, (1, 1), strides=1, padding="same",
                      kernel_initializer="he_normal", use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # 2. Depthwise Convolution (Derinlikçe Ayrık Evrişim)
    x = layers.DepthwiseConv2D((3, 3), strides=stride, padding="same",
                               depthwise_initializer="he_normal", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # 3. Squeeze-and-Excitation (SE Blok)
    if se_ratio:
        se = layers.GlobalAveragePooling2D()(x)
        se = layers.Reshape((1, 1, x.shape[-1]))(se)
        se = layers.Conv2D(int(input_channels * se_ratio), (1, 1), activation="relu", kernel_initializer="he_normal")(se)
        se = layers.Conv2D(x.shape[-1], (1, 1), activation="sigmoid", kernel_initializer="he_normal")(se)
        x = layers.Multiply()([x, se])

    # 4. Project (Daraltma)
    x = layers.Conv2D(filters, (1, 1), strides=1, padding="same",
                      kernel_initializer="he_normal", use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    # 5. Shortcut Connection (Skip Connection)
    if stride == 1 and inputs.shape[-1] == filters:
        x = layers.Add()([inputs, x])

    return x



def sigmachess_network(input_shape=(8, 8, 119)):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(256, (3, 3), strides=1, padding="same", kernel_initializer="he_normal", use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    for _ in range(19):
        x = EfficientNet_block(x)

    policy = layers.Conv2D(256, (3, 3), strides=1, padding="same", kernel_initializer="he_normal", use_bias=False)(x)
    policy = layers.BatchNormalization()(policy)
    policy = layers.ReLU()(policy)
    policy = layers.Conv2D(73, (1, 1), strides=1, padding="same", kernel_initializer="he_normal")(policy)
    policy = layers.Flatten()(policy)
    policy = layers.Softmax(name="policy_output")(policy)

    value = layers.Conv2D(1, (1, 1), strides=1, padding="same", kernel_initializer="he_normal", use_bias=False)(x)
    value = layers.BatchNormalization()(value)
    value = layers.ReLU()(value)
    value = layers.Flatten()(value)
    value = layers.Dense(256, activation="relu", kernel_initializer="he_normal")(value)
    value = layers.Dense(1, activation="tanh", name="value_output", kernel_initializer="he_normal")(value)

    model = models.Model(inputs=inputs, outputs=[policy, value])

    return model

def create_model():
    model = sigmachess_network()

    model.compile(
        optimizer=Adam(learning_rate=0.00001),
        loss={
            "policy_output": "categorical_crossentropy",
            "value_output": "mean_squared_error"
        },
        metrics={
            "policy_output": "accuracy",
            "value_output": "mse"
        }
    )

    return model

def play_vs_stockfish(model, game, replay_buffer):
    state = GameState()
    temperature = 1.0 if game < 5 else 0.1

    w_states, w_policies, w_rewards = [], [], []
    player = np.random.choice([chess.WHITE, chess.BLACK])

    engine = chess.engine.SimpleEngine.popen_uci(r"/content/drive/My Drive/stockfish-ubuntu-x86-64-avx2")

    while not state.is_terminal():
        if state.board.turn == player:
            mcts = MCTS(model, 1.0, 10)
            action_probs = mcts.run(state, temperature)

            w_states.append(state.get_current_state())
            w_policies.append(action_probs)

            action = np.random.choice(len(action_probs), p=action_probs)
            state.get_next_state(action)
        else:
            result = engine.play(state.board, chess.engine.Limit(0.04))
            state.apply_action(result.move)

    engine.close()

    winner = state.get_winner()
    w_value = 1 if winner == player else (0 if winner == 2 else -1)

    print(player, state.board.board_fen())

    replay_buffer.store_multiple_data(w_states, w_policies, w_value)

def self_play(model, num_games=100, max_workers=5):
    replay_buffer = ReplayBuffer(maxlen=500000)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [
            executor.submit(play_vs_stockfish, model, i, replay_buffer)
            for i in range(num_games)
        ]

        for future in as_completed(futures):
            future.result()

    print("Self-play completed")

    return replay_buffer

def prepare_validation_data(replay_buffer=None, filename="validation_data.npy"):
    try:
        # First, try to load existing validation data
        data = np.load(filename, allow_pickle=True).item()
        print(f"Loaded existing validation data from {filename}")
        return data['x_val'], {
            "policy_output": data['y_val_policy'],
            "value_output": data['y_val_value']
        }
    except (FileNotFoundError, IOError):
        # If no existing data, create new validation dataset
        if replay_buffer is None:
            # If no replay buffer provided, generate synthetic data
            x_val = np.random.random((100, 8, 8, 119))
            y_val_policy = np.random.randint(0, 73, size=(100, 1))
            y_val_value = np.random.random((100, 1)) * 2 - 1  # Values between -1 and 1
        else:
            # Use replay buffer to generate validation data
            states, policies, rewards = replay_buffer.sample(100)
            x_val = np.squeeze(states)
            y_val_policy = policies
            y_val_value = np.array(rewards).reshape(-1, 1)

        # One-hot encode policy output
        y_val_policy_onehot = np.eye(73)[y_val_policy.flatten()]

        # Prepare data dictionary
        validation_data = {
            "x_val": x_val,
            "y_val_policy": y_val_policy_onehot,
            "y_val_value": y_val_value
        }

        # Save the validation data
        np.save(filename, validation_data)
        print(f"Created and saved new validation data to {filename}")

        return x_val, {
            "policy_output": y_val_policy_onehot,
            "value_output": y_val_value
        }

def create_callbacks(checkpoint_path="/content/drive/My Drive/sigma_checkpoint.weights.h5"):
    checkpoint = ModelCheckpoint(
        filepath=checkpoint_path,
        save_weights_only=True,
        monitor="loss",
        mode="min",
        save_best_only=True,
        save_freq="epoch",
        verbose=1
    )
    return [checkpoint]

def train_model(model, replay_buffer: ReplayBuffer, batch_size=256, epochs=3, checkpoint_path="/content/drive/My Drive/sigma_checkpoint.weights.h5"):
    x_val, y_val = prepare_validation_data()

    callbacks = create_callbacks(checkpoint_path)

    total_policy_loss = 0
    total_value_loss = 0
    epoch_count = 0

    for epoch in range(epochs):
        states, policies, values = replay_buffer.sample(batch_size)

        states = np.squeeze(states)
        if len(states.shape) == 3:
            states = np.expand_dims(states, -1)

        values = np.array(values).reshape(-1, 1)

        history = model.fit(
            states,
            { "policy_output": policies, "value_output": values },
            batch_size=batch_size,
            epochs=1,
            callbacks=callbacks,
            verbose=1
        )

        total_policy_loss += history.history['policy_output_loss'][0]
        total_value_loss += history.history['value_output_loss'][0]
        epoch_count += 1

    avg_policy_loss = total_policy_loss / epoch_count
    avg_value_loss = total_value_loss / epoch_count

    print(f"\nAverage Policy Output Loss: {avg_policy_loss}")
    print(f"Average Value Output Loss: {avg_value_loss}")

    return avg_policy_loss, avg_value_loss

is_stop = False

def train_sigmachess(model, num_iterations=100, num_games_per_iteration=100):
    global is_stop

    # Değişkenleri başlatın
    total_policy_loss = 0
    total_value_loss = 0
    iteration_count = 0

    for iteration in range(num_iterations):
        print(f"Iteration {iteration + 1}/{num_iterations}")

        replay_buffer = self_play(model, num_games_per_iteration)
        policy_loss, value_loss = train_model(model, replay_buffer)

        # Kayıpları biriktir
        total_policy_loss += policy_loss
        total_value_loss += value_loss
        iteration_count += 1

        # Her 5 iteration'da bir ortalamaları yazdır
        if iteration_count % 5 == 0:
            avg_policy_loss = total_policy_loss / iteration_count
            avg_value_loss = total_value_loss / iteration_count
            print(f"\n[After {iteration_count} Iterations]")
            print(f"Average Policy Output Loss: {avg_policy_loss}")
            print(f"Average Value Output Loss: {avg_value_loss}")

        if is_stop:
            break

    model.save("/content/drive/My Drive/full_model.keras")


def stop():
    global is_stop

    while True:
        inp = input("")
        if inp == "stop":
            is_stop = True
            print("After the iteration is completed, the training will be stopped and the model will be saved!")

            break

t = threading.Thread(target=stop, daemon=True)
t.start()

model = create_model()
train_sigmachess(model, num_iterations=7000, num_games_per_iteration=15)


Iteration 1/7000




False r1bQ1knr/ppp2ppp/8/4p3/4P3/N4N2/PPP2PPP/R1B1KB1R
True r3k1nr/pppb1ppp/2n5/2b1p2q/8/PP5P/2PPB3/RNBK3q
True rn2k2r/pp2bppp/6b1/1P2p3/2ppn3/P1P1P3/1R1PKqPP/2BQ1BNR
True r3kb1r/1p3ppp/p7/3pp3/PP1n2n1/6Pb/2qK3P/RNB1Q2R
True r4rk1/1p3pbp/2n2np1/p1p5/P1pp4/1P4Pb/RBP2P1P/5qKR
True r4b1r/pp1k1ppp/2n1b3/2p2q2/1P2K1n1/P2PP1P1/2P2P1P/R1B2BR1
False 3k1bnr/3Q1p2/2B1p2p/p4bp1/8/2N5/PPPB1PPP/R3K1NR
True rnb1kb1r/ppp1pppp/8/8/P5n1/6P1/1PPP1p1P/RNBKqBNR
True rnb1k1nr/pppp1ppp/8/4p3/8/2P2Pq1/PP1PP3/R1BQKBN1
False rn1k1Qnr/1b1p3p/1p1P1Np1/p4p2/2p1P3/2B2N2/PPP1BPPP/R3R1K1
False r1bqkQ2/pppp1p2/7B/4n3/3PP3/8/PPP2PPP/RN2KBNR
True 4rbnr/pp1k1ppp/8/3p4/1p3PbP/P3P1P1/2n1qK2/RNB3R1
False 2r2k1Q/5p1p/1pp3p1/p1P3B1/8/2N2N1P/PPP2PBP/R3K2R
False r2k1bnr/1b1Qp3/p1p3B1/1pP1N2p/1P6/2N5/1PP2PPP/R1B1R1K1
False 1n1Q3R/1b1k4/2pp4/8/3PPN2/4B3/PPP2PPK/4R2R
Self-play completed
Loaded existing validation data from validation_data.npy
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94s/step - loss: 12.3022 -

In [None]:
'''
def run(self, initial_state, temperature=1.0):
    root = Node(initial_state)

    # First evaluate and expand root
    action_probs, value = self.evaluate(initial_state)
    valid_moves = initial_state.get_valid_moves()

    # Add Dirichlet noise to root (alpha=0.3 for chess)
    noise = np.random.dirichlet([0.3] * len(valid_moves))

    # Expand with noisy priors
    for idx, action in enumerate(valid_moves):
        prob = action_probs[action]
        noisy_prob = 0.75 * prob + 0.25 * noise[idx]
        next_state = initial_state.clone()
        next_state.get_next_state(action)
        root.children[action] = Node(next_state, parent=root, prior_prob=noisy_prob)

    for _ in range(self.simulations):
        node = root

        # Selection with additional safety checks
        while node.is_expanded and not node.state.is_terminal():
            action, child_node = node.select(self.c_puct)

            # Safety check to prevent NoneType errors
            if child_node is None:
                break

            node = child_node

        # Expansion and Evaluation
        if not node.state.is_terminal():
            # Ensure node has valid children before expansion
            if not node.is_expanded and len(node.children) == 0:
                action_probs, value = self.evaluate(node.state)
                valid_moves = node.state.get_valid_moves()
                node.expand(action_probs)

            if node.is_expanded and len(node.children) > 0:
                # Randomly select an unexpanded child if possible
                unexpanded_children = [child for child in node.children.values() if not child.is_expanded]
                if unexpanded_children:
                    node = np.random.choice(unexpanded_children)

                action_probs, value = self.evaluate(node.state)
                valid_moves = node.state.get_valid_moves()
                node.expand(action_probs)
            else:
                # Fallback: re-evaluate the current node
                value = node.state.get_winner()
                value = 1 if value == node.state.player_color else (0 if value == 2 else -1)
        else:
            value = node.state.get_winner()
            value = 1 if value == node.state.player_color else (0 if value == 2 else -1)

        # Backup
        node.backup(value)

    return self.get_action_probs(root, temperature)
'''
#run revize edilmiş

In [None]:
#!!!   üstteki hata önleme etkili değilse yapmamız gereken Eğer mevcut düğüm üzerinde genişletme veya seçim yapılması mümkün değilse, başka bir alt düğüme geçiş yapılır. Bu, unexpanded_children listesinden rastgele bir çocuk seçilerek yapılır.
'''
for _ in range(self.simulations):
    node = root

    # Seçim aşamasında NoneType kontrolü
    while node.is_expanded and not node.state.is_terminal():
        action, child_node = node.select(self.c_puct)

        # Eğer child_node None ise, bu düğüm üzerinde işlem yapılmaz, döngü devam eder
        if child_node is None:
            break

        node = child_node

    # Genişletme ve Değerlendirme
    if not node.state.is_terminal():
        # Düğümün çocukları yoksa ve genişletilemiyorsa
        if not node.is_expanded and len(node.children) == 0:
            action_probs, value = self.evaluate(node.state)
            valid_moves = node.state.get_valid_moves()

            # Eğer geçerli hareketler varsa, genişletme işlemi yapılır
            if valid_moves:
                node.expand(action_probs)
            else:
                # Geçerli hareket yoksa, yedekleme yap
                value = node.state.get_winner()
                value = 1 if value == node.state.player_color else (0 if value == 2 else -1)
                node.backup(value)
                continue  # Diğer bir düğüme geçmek için döngüye devam et

        # Çocuk düğümlerinin varlığı kontrol ediliyor
        if node.is_expanded and len(node.children) > 0:
            unexpanded_children = [child for child in node.children.values() if not child.is_expanded]
            if unexpanded_children:
                # Rastgele bir genişletilmemiş çocuk seç
                node = np.random.choice(unexpanded_children)

            action_probs, value = self.evaluate(node.state)
            valid_moves = node.state.get_valid_moves()
            node.expand(action_probs)
        else:
            # Eğer hiçbir şey yapılamazsa, fallback değeriyle yedekleme yap
            value = node.state.get_winner()
            value = 1 if value == node.state.player_color else (0 if value == 2 else -1)
            node.backup(value)
            continue  # Geçerli düğüm üzerinde işlem yapamıyorsak, başka bir düğüme geç
    else:
        # Eğer düğüm terminalse, kazananı belirle
        value = node.state.get_winner()
        value = 1 if value == node.state.player_color else (0 if value == 2 else -1)

    # Değeri yedekle
    node.backup(value)
'''