# Purpose of the notebook

In this notebook there is the code and the results of the experiment I performed to see whether ChatGPT, to be more precise the curently free trial version GPT-4-Turbo, or DeepSeek-V3 would create a better tic-tac-toe agent using supervised learning. This experiment was conducted out of pure curiosity and with the aim of incorporating it into a broader work in the field of Intelligent Agents, more specifically building a tic-tac-toe agent that perfects the game, testing them against each other and against a perfect model

# DeepSeek Agents

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from collections import deque
import random

# Define the Tic Tac Toe environment
def initialize_board():
    """Initialize an empty 3x3 Tic Tac Toe board."""
    return np.zeros((3, 3), dtype=int)

def is_winner(board, player):
    """Check if the given player has won the game."""
    for i in range(3):
        if all(board[i, :] == player) or all(board[:, i] == player):
            return True
    if all([board[i, i] == player for i in range(3)]) or all([board[i, 2 - i] == player for i in range(3)]):
        return True
    return False

def is_draw(board):
    """Check if the game is a draw."""
    return np.all(board != 0)

def available_moves(board):
    """Get a list of available moves on the board."""
    return [(i, j) for i in range(3) for j in range(3) if board[i, j] == 0]

def make_move(board, move, player):
    """Make a move on the board for the given player."""
    board[move] = player

def generate_similar_games():
    """Generate game states similar to scenarios where the model struggled."""
    games = []
    for _ in range(100):
        board = initialize_board()
        # Scenario: Opponent is about to win unless blocked
        board[0, 2], board[1, 1], board[2, 0] = -1, -1, 0
        games.append((board, -1))  # -1's turn

        board = initialize_board()
        # Scenario: A winning move is available
        board[0, 0], board[1, 1], board[0, 1] = 1, 1, 0
        games.append((board, 1))  # 1's turn

    return games

# Define the improved model
def create_improved_model():
    """Create a CNN-based model for Tic Tac Toe."""
    model = Sequential([
        Conv2D(64, (2, 2), activation="relu", input_shape=(3, 3, 1)),  # Convolutional layer
        Flatten(),  # Flatten the output
        Dense(128, activation="relu"),  # Fully connected layer
        Dense(64, activation="relu"),   # Another fully connected layer
        Dense(9, activation="linear")   # Output layer for 9 possible moves
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss="mse")  # Mean Squared Error loss
    return model

# Encode the board state
def encode_board(board):
    """Encode the board state into a 3x3x1 tensor."""
    return board.reshape(3, 3, 1)

# Choose a move based on the model's prediction
def choose_move(model, board, epsilon=0.1):
    """Choose a move using an epsilon-greedy strategy."""
    if np.random.rand() < epsilon:
        return random.choice(available_moves(board))  # Explore: choose a random move
    predictions = model.predict(np.expand_dims(encode_board(board), axis=0), verbose=0)
    sorted_moves = np.argsort(predictions[0])[::-1]  # Sort moves by predicted value
    for move in sorted_moves:
        x, y = divmod(move, 3)  # Convert flat index to (x, y) coordinates
        if (x, y) in available_moves(board):
            return (x, y)  # Exploit: choose the best valid move

# Experience replay buffer
class ReplayBuffer:
    """A buffer to store and sample past experiences for training."""
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def add(self, experience):
        """Add an experience to the buffer."""
        self.buffer.append(experience)

    def sample(self, batch_size):
        """Sample a batch of experiences from the buffer."""
        return random.sample(self.buffer, batch_size)

    def __len__(self):
        """Get the current size of the buffer."""
        return len(self.buffer)

# Train the model with experience replay
def train_model_with_replay(model, buffer, batch_size=32, epochs=10):
    """Train the model using experiences sampled from the replay buffer."""
    if len(buffer) < batch_size:
        return  # Not enough experiences to sample

    samples = buffer.sample(batch_size)
    x_train, y_train = [], []

    for board, move, reward in samples:
        x_train.append(encode_board(board))  # Encode the board state
        target = model.predict(np.expand_dims(encode_board(board), axis=0), verbose=0)[0]
        target[move[0] * 3 + move[1]] = reward  # Update the target for the chosen move
        y_train.append(target)

    x_train = np.array(x_train)
    y_train = np.array(y_train)
    model.fit(x_train, y_train, epochs=epochs, verbose=1)  # Train the model

# Self-play with experience replay
def self_play_training_with_replay(model, buffer, iterations=100):
    """Train the model through self-play and store experiences in the replay buffer."""
    for _ in range(iterations):
        state = initialize_board()
        player = 1
        history = []

        while True:
            move = choose_move(model, state)  # Choose a move
            make_move(state, move, player)  # Apply the move
            history.append((state.copy(), move, player))  # Store the state and move

            winner = is_winner(state, player)
            if winner or is_draw(state):
                # Assign rewards based on the game outcome
                reward = 1 if winner else 0.5
                for past_state, past_move, past_player in reversed(history):
                    buffer.add((past_state, past_move, reward))  # Add experience to the buffer
                    reward *= 0.9  # Discount factor for earlier moves
                break

            player = -player  # Switch players

# Main function
def main():
    """Main function to train and save the improved Tic Tac Toe model."""
    model = create_improved_model()
    buffer = ReplayBuffer(capacity=10000)

    # Targeted training based on scenarios where the model struggled
    similar_games = generate_similar_games()
    for board, player in similar_games:
        moves = available_moves(board)
        for move in moves:
            temp_board = board.copy()
            make_move(temp_board, move, player)
            reward = 1 if is_winner(temp_board, player) else 0.5
            buffer.add((board, move, reward))  # Add experiences to the buffer

    train_model_with_replay(model, buffer, epochs=20)  # Train the model

    # Self-play to refine strategies
    self_play_training_with_replay(model, buffer, iterations=100)

    # Save the updated model
    model.save("DeepSeekRNN.h5")

if __name__ == "__main__":
    main()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 0.0413
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - loss: 0.0367
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 0.0345
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - loss: 0.0334
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.0328
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 0.0325
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.0324
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.0324
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 0.0324
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.0323
Epoch 11/20
[1m1/1[



In [None]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
import math
from functools import lru_cache

# Constants
class Config:
    BOARD_SIZE = 3
    NUM_TRAINING_SAMPLES = 2000
    PLAYERS = {'X': 1, 'O': -1}
    DEPTH = 5

# Create an empty board
def create_board():
    return np.zeros((Config.BOARD_SIZE, Config.BOARD_SIZE), dtype=int)

# Get valid moves
def get_valid_moves(board):
    return [(i, j) for i in range(Config.BOARD_SIZE) for j in range(Config.BOARD_SIZE) if board[i, j] == 0]

# Apply a move to the board
def apply_move(board, move, player):
    if move not in get_valid_moves(board):
        raise ValueError(f"Invalid move: {move}")
    board[move[0], move[1]] = player

# Check for a winner
def check_winner(board):
    for i in range(Config.BOARD_SIZE):
        if abs(sum(board[i, :])) == Config.BOARD_SIZE:  # Row check
            return np.sign(sum(board[i, :]))
        if abs(sum(board[:, i])) == Config.BOARD_SIZE:  # Column check
            return np.sign(sum(board[:, i]))

    # Diagonals
    if abs(sum([board[i, i] for i in range(Config.BOARD_SIZE)])) == Config.BOARD_SIZE:
        return np.sign(sum([board[i, i] for i in range(Config.BOARD_SIZE)]))
    if abs(sum([board[i, Config.BOARD_SIZE - i - 1] for i in range(Config.BOARD_SIZE)])) == Config.BOARD_SIZE:
        return np.sign(sum([board[i, Config.BOARD_SIZE - i - 1] for i in range(Config.BOARD_SIZE)]))

    if 0 not in board:
        return 0  # Draw

    return None  # Game ongoing

# Minimax with alpha-beta pruning and memoization
@lru_cache(maxsize=None)
def minimax(board_tuple, depth, maximizing_player, alpha=-math.inf, beta=math.inf):
    # Convert the tuple back to a numpy array for processing
    board = np.array(board_tuple).reshape(Config.BOARD_SIZE, Config.BOARD_SIZE)

    winner = check_winner(board)
    if winner is not None or depth == 0:
        return winner or 0  # 1 for win, -1 for loss, 0 for draw

    valid_moves = get_valid_moves(board)

    if maximizing_player:
        max_eval = -math.inf
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, 1)
            # Convert the board to a tuple before passing it to minimax
            eval = minimax(tuple(temp_board.ravel()), depth - 1, False, alpha, beta)
            max_eval = max(max_eval, eval)
            alpha = max(alpha, eval)
            if beta <= alpha:
                break
        return max_eval
    else:
        min_eval = math.inf
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, -1)
            # Convert the board to a tuple before passing it to minimax
            eval = minimax(tuple(temp_board.ravel()), depth - 1, True, alpha, beta)
            min_eval = min(min_eval, eval)
            beta = min(beta, eval)
            if beta <= alpha:
                break
        return min_eval

def minimax_move(board, player, depth=Config.DEPTH):
    valid_moves = get_valid_moves(board)
    best_move = None
    best_value = -math.inf if player == 1 else math.inf

    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        # Convert the board to a tuple before passing it to minimax
        move_value = minimax(tuple(temp_board.ravel()), depth - 1, player == -1)

        if (player == 1 and move_value > best_value) or (player == -1 and move_value < best_value):
            best_value = move_value
            best_move = move

    return best_move
# Random player
def random_move(board, player):
    valid_moves = get_valid_moves(board)
    return random.choice(valid_moves) if valid_moves else None

# Heuristic player
def heuristic_move(board, player):
    valid_moves = get_valid_moves(board)
    if not valid_moves:
        return None

    # Try to win immediately
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        if check_winner(temp_board) == player:
            return move

    # Try to block opponent's winning move
    opponent = -player
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, opponent)
        if check_winner(temp_board) == opponent:
            return move

    # Take the center if available
    if (1, 1) in valid_moves:
        return (1, 1)

    # Take a corner if available
    corners = [(0, 0), (0, 2), (2, 0), (2, 2)]
    for corner in corners:
        if corner in valid_moves:
            return corner

    # Take any available move
    return random.choice(valid_moves)

# Generate training data
def generate_training_data(num_samples):
    X_data, y_data = [], []

    for _ in range(num_samples):
        board = create_board()
        moves = []
        player = 1 if random.random() < 0.5 else -1
        winner = None

        while True:
            valid_moves = get_valid_moves(board)

            # Choose player strategy
            player_type = random.choice(["minimax", "random", "heuristic"])

            if player_type == "minimax":
                move = minimax_move(board, player, depth=3)
            elif player_type == "random":
                move = random_move(board, player)
            elif player_type == "heuristic":
                move = heuristic_move(board, player)

            if move is None:
                break

            apply_move(board, move, player)
            moves.append((board.copy(), move, player))

            winner = check_winner(board)
            if winner is not None:
                break
            player *= -1

        for board_state, move, player in moves:
            X_data.append(board_state)
            y_data.append(move[0] * Config.BOARD_SIZE + move[1])

    X_data = np.array(X_data).reshape(-1, Config.BOARD_SIZE, Config.BOARD_SIZE, 1)
    y_data_moves = tf.keras.utils.to_categorical(y_data, num_classes=Config.BOARD_SIZE**2)
    return X_data, y_data_moves

# Build the model
def build_model():
    inputs = Input(shape=(Config.BOARD_SIZE, Config.BOARD_SIZE, 1))
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    move_output = Dense(Config.BOARD_SIZE**2, activation='softmax', name='move_output')(x)

    model = Model(inputs=inputs, outputs=move_output)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Predict move
def predict_move(model, board, player):
    board_input = board.copy() * player
    board_input = board_input.reshape(1, Config.BOARD_SIZE, Config.BOARD_SIZE, 1)
    predictions = model.predict(board_input, verbose=0)[0]

    valid_moves = get_valid_moves(board)
    move_probs = np.zeros_like(predictions)
    for move in valid_moves:
        idx = move[0] * Config.BOARD_SIZE + move[1]
        move_probs[idx] = predictions[idx]

    best_move_idx = np.argmax(move_probs)
    move = (best_move_idx // Config.BOARD_SIZE, best_move_idx % Config.BOARD_SIZE)

    if move not in valid_moves:
        raise ValueError(f"Model predicted an invalid move: {move}")

    return move

# Main function
if __name__ == "__main__":
    print("Generating training data...")
    X, y_moves = generate_training_data(Config.NUM_TRAINING_SAMPLES)
    print(f"Training data generated: {X.shape[0]} samples")

    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y_moves, test_size=0.2, random_state=42)

    # Build and train the model
    model = build_model()
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    print("Training the model...")
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=64,
        callbacks=[early_stopping]
    )

    # Save the model
    model.save("DeepSeek CNN.h5")
    print("Model trained and saved successfully!")

Generating training data...
Training data generated: 15097 samples
Training the model...
Epoch 1/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.2100 - loss: 2.1260 - val_accuracy: 0.4175 - val_loss: 1.7276
Epoch 2/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.3990 - loss: 1.6973 - val_accuracy: 0.4583 - val_loss: 1.5042
Epoch 3/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4582 - loss: 1.5292 - val_accuracy: 0.4917 - val_loss: 1.3941
Epoch 4/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4953 - loss: 1.4164 - val_accuracy: 0.5199 - val_loss: 1.3059
Epoch 5/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5318 - loss: 1.3212 - val_accuracy: 0.5530 - val_loss: 1.2455
Epoch 6/10
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5



Model trained and saved successfully!


In [None]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, BatchNormalization
from sklearn.model_selection import train_test_split
import math

# Constants
BOARD_SIZE = 3
NUM_TRAINING_SAMPLES = 2000

# Create an empty board
def create_board():
    return np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)

# Get valid moves
def get_valid_moves(board):
    return [(i, j) for i in range(BOARD_SIZE) for j in range(BOARD_SIZE) if board[i, j] == 0]

# Apply a move to the board
def apply_move(board, move, player):
    if move in get_valid_moves(board):
        board[move[0], move[1]] = player
    else:
        raise ValueError(f"Invalid move: {move}")

# Check for a winner
def check_winner(board):
    for i in range(BOARD_SIZE):
        if abs(sum(board[i, :])) == BOARD_SIZE:  # Row check
            return np.sign(sum(board[i, :]))
        if abs(sum(board[:, i])) == BOARD_SIZE:  # Column check
            return np.sign(sum(board[:, i]))

    # Diagonals
    if abs(sum([board[i, i] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, i] for i in range(BOARD_SIZE)]))
    if abs(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)]))

    if 0 not in board:
        return 0  # Draw

    return None  # Game ongoing

# Minimax with alpha-beta pruning
def minimax(board, depth, maximizing_player, alpha=-math.inf, beta=math.inf):
    winner = check_winner(board)
    if winner is not None or depth == 0:
        return winner or 0  # 1 for win, -1 for loss, 0 for draw

    valid_moves = get_valid_moves(board)

    if maximizing_player:
        max_eval = -math.inf
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, 1)
            eval = minimax(temp_board, depth - 1, False, alpha, beta)
            max_eval = max(max_eval, eval)
            alpha = max(alpha, eval)
            if beta <= alpha:
                break
        return max_eval
    else:
        min_eval = math.inf
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, -1)
            eval = minimax(temp_board, depth - 1, True, alpha, beta)
            min_eval = min(min_eval, eval)
            beta = min(beta, eval)
            if beta <= alpha:
                break
        return min_eval

# Choose move using minimax
def minimax_move(board, player, depth=5):
    valid_moves = get_valid_moves(board)
    best_move = None
    best_value = -math.inf if player == 1 else math.inf

    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        move_value = minimax(temp_board, depth - 1, player == -1)

        if (player == 1 and move_value > best_value) or (player == -1 and move_value < best_value):
            best_value = move_value
            best_move = move

    return best_move

# Random player
def random_move(board, player):
    valid_moves = get_valid_moves(board)
    return random.choice(valid_moves) if valid_moves else None

# Heuristic player
def heuristic_move(board, player):
    valid_moves = get_valid_moves(board)
    if not valid_moves:
        return None

    # Try to win immediately
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        if check_winner(temp_board) == player:
            return move

    # Try to block opponent's winning move
    opponent = -player
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, opponent)
        if check_winner(temp_board) == opponent:
            return move

    # Take the center if available
    if (1, 1) in valid_moves:
        return (1, 1)

    # Take a corner if available
    corners = [(0, 0), (0, 2), (2, 0), (2, 2)]
    for corner in corners:
        if corner in valid_moves:
            return corner

    # Take any available move
    return random.choice(valid_moves)

# Generate training data
def generate_training_data(num_samples):
    X_data, y_data = [], []

    for _ in range(num_samples):
        board = create_board()
        moves = []
        player = 1 if random.random() < 0.5 else -1
        winner = None

        while True:
            valid_moves = get_valid_moves(board)

            # Choose player strategy
            player_type = random.choice(["minimax", "random", "heuristic"])

            if player_type == "minimax":
                move = minimax_move(board, player, depth=3)
            elif player_type == "random":
                move = random_move(board, player)
            elif player_type == "heuristic":
                move = heuristic_move(board, player)

            if move is None:
                break

            apply_move(board, move, player)
            moves.append((board.copy(), move, player))

            winner = check_winner(board)
            if winner is not None:
                break
            player *= -1

        for board_state, move, player in moves:
            X_data.append(board_state)
            y_data.append(move[0] * BOARD_SIZE + move[1])

    X_data = np.array(X_data).reshape(-1, BOARD_SIZE, BOARD_SIZE, 1)
    y_data_moves = tf.keras.utils.to_categorical(y_data, num_classes=BOARD_SIZE**2)
    return X_data, y_data_moves

# Build the model
def build_model():
    inputs = Input(shape=(BOARD_SIZE, BOARD_SIZE, 1))
    x = Flatten()(inputs)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    move_output = Dense(BOARD_SIZE**2, activation='softmax', name='move_output')(x)

    model = Model(inputs=inputs, outputs=move_output)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Predict move
def predict_move(model, board, player):
    board_input = board.copy() * player
    board_input = board_input.reshape(1, BOARD_SIZE, BOARD_SIZE, 1)
    predictions = model.predict(board_input, verbose=0)[0]

    valid_moves = get_valid_moves(board)
    move_probs = np.zeros_like(predictions)
    for move in valid_moves:
        idx = move[0] * BOARD_SIZE + move[1]
        move_probs[idx] = predictions[idx]

    best_move_idx = np.argmax(move_probs)
    move = (best_move_idx // BOARD_SIZE, best_move_idx % BOARD_SIZE)

    if move not in valid_moves:
        raise ValueError(f"Model predicted an invalid move: {move}")

    return move

# Main function
if __name__ == "__main__":
    print("Generating training data...")
    X, y_moves = generate_training_data(NUM_TRAINING_SAMPLES)
    print(f"Training data generated: {X.shape[0]} samples")

    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y_moves, test_size=0.2, random_state=42)

    # Build and train the model
    model = build_model()
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

    print("Training the model...")
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=20,
        batch_size=64,
        callbacks=[early_stopping, lr_scheduler]
    )

    # Save the model
    model.save("DeepSeek NN.h5")
    print("Model trained and saved successfully!")


Generating training data...
Training data generated: 15129 samples
Training the model...
Epoch 1/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.2703 - loss: 2.4741 - val_accuracy: 0.3493 - val_loss: 1.8025 - learning_rate: 0.0010
Epoch 2/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.5005 - loss: 1.4935 - val_accuracy: 0.6186 - val_loss: 1.2162 - learning_rate: 0.0010
Epoch 3/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.5612 - loss: 1.2927 - val_accuracy: 0.6824 - val_loss: 0.9152 - learning_rate: 0.0010
Epoch 4/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.6014 - loss: 1.1529 - val_accuracy: 0.6933 - val_loss: 0.8404 - learning_rate: 0.0010
Epoch 5/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.6180 - loss: 1.0991 - val_accuracy: 0.6943 - val_loss: 0.8192 



Model trained and saved successfully!


DeepSeek TOURNAMENT

In [None]:
import numpy as np
from tensorflow.keras.models import load_model
import time
from tensorflow.keras.losses import MeanSquaredError
BOARD_SIZE = 3
model1 = load_model("DeepSeek NN.h5", custom_objects={"mse": MeanSquaredError()})
model2 = load_model("DeepSeek CNN.h5", custom_objects={"mse": MeanSquaredError()})
model3 = load_model("DeepSeekRNN.h5", custom_objects={"mse": MeanSquaredError()})
model_names = {
    model1: "DeepSeekNN",
    model2: "DeepSeekCNN",
    model3: "DeepSeekRNN"
}
def display_board(board):
    symbols = {0: '.', 1: 'X', -1: 'O'}
    for row in board:
        print(" ".join(symbols[cell] for cell in row))
    print("\n")

def get_valid_moves(board):
    return [(i, j) for i in range(BOARD_SIZE) for j in range(BOARD_SIZE) if board[i, j] == 0]
def apply_move(board, move, player):
    board[move[0], move[1]] = player
def check_winner(board):
    for i in range(BOARD_SIZE):
        if abs(sum(board[i, :])) == BOARD_SIZE:
            return np.sign(sum(board[i, :]))
        if abs(sum(board[:, i])) == BOARD_SIZE:
            return np.sign(sum(board[:, i]))


    if abs(sum([board[i, i] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, i] for i in range(BOARD_SIZE)]))
    if abs(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)]))

    if 0 not in board:
        return 0

    return None
def predict_move(model, board, player):
    board_input = board.copy() * player

    if len(model.input_shape) == 4 and model.input_shape[1:] == (3, 3, 1):
        board_input = board_input.reshape(1, BOARD_SIZE, BOARD_SIZE, 1).astype("float32")
    elif len(model.input_shape) == 2 and model.input_shape[1] == 9:
        board_input = board_input.reshape(1, BOARD_SIZE * BOARD_SIZE).astype("float32")
    elif len(model.input_shape) == 3 and model.input_shape[1:] == (9, 1):
        board_input = board_input.reshape(1, BOARD_SIZE * BOARD_SIZE, 1).astype("float32")
    else:
        raise ValueError(f"Unexpected input shape for the model: {model.input_shape}")

    valid_moves = get_valid_moves(board)
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        if check_winner(temp_board) == player:
            return move

    predictions = model.predict(board_input, verbose=0)[0].flatten()
    move_probs = np.zeros_like(predictions)
    for move in valid_moves:
        idx = move[0] * BOARD_SIZE + move[1]
        move_probs[idx] = predictions[idx]

    best_move_idx = np.argmax(move_probs)
    return (best_move_idx // BOARD_SIZE, best_move_idx % BOARD_SIZE)
def play_game(model_a, model_b, game_num):
    board = np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)
    player = 1

    print(f"Game {game_num} starts:")
    while True:
        print(f"Player {player}'s turn:")
        display_board(board)

        move = predict_move(model_a if player == 1 else model_b, board, player)
        apply_move(board, move, player)
        winner = check_winner(board)
        if winner is not None:
            print("\nFinal Board:")
            display_board(board)
            return winner

        player *= -1
def conduct_tournament(num_games=3):
    models = [model1, model2, model3]
    scores = {model_names[m]: 0 for m in models}
    scores["Draw"] = 0

    game_num = 1
    for i in range(len(models)):
        for j in range(i + 1, len(models)):
            model_a, model_b = models[i], models[j]
            name_a, name_b = model_names[model_a], model_names[model_b]

            for _ in range(num_games):
                print(f"{name_a} vs {name_b}")
                winner = play_game(model_a, model_b, game_num)
                if winner == 1:
                    print(f"{name_a} wins!\n")
                    scores[name_a] += 1
                elif winner == -1:
                    print(f"{name_b} wins!\n")
                    scores[name_b] += 1
                else:
                    print("It's a draw!\n")
                    scores["Draw"] += 1
                game_num += 1
                time.sleep(1)

            for _ in range(num_games):
                print(f"{name_b} vs {name_a}")
                winner = play_game(model_b, model_a, game_num)
                if winner == 1:
                    print(f"{name_b} wins!\n")
                    scores[name_b] += 1
                elif winner == -1:
                    print(f"{name_a} wins!\n")
                    scores[name_a] += 1
                else:
                    print("It's a draw!\n")
                    scores["Draw"] += 1
                game_num += 1
                time.sleep(1)

    print("Tournament Results:")
    for model, score in scores.items():
        print(f"{model}: {score}")

if __name__ == "__main__":
    conduct_tournament(num_games=1)




DeepSeekNN vs DeepSeekCNN
Game 1 starts:
Player 1's turn:
. . .
. . .
. . .


Player -1's turn:
X . .
. . .
. . .


Player 1's turn:
X . .
. O .
. . .


Player -1's turn:
X . .
X O .
. . .


Player 1's turn:
X . .
X O .
O . .


Player -1's turn:
X . X
X O .
O . .


Player 1's turn:
X O X
X O .
O . .


Player -1's turn:
X O X
X O X
O . .



Final Board:
X O X
X O X
O O .


DeepSeekCNN wins!

DeepSeekCNN vs DeepSeekNN
Game 2 starts:
Player 1's turn:
. . .
. . .
. . .


Player -1's turn:
X . .
. . .
. . .


Player 1's turn:
X . .
. O .
. . .


Player -1's turn:
X . .
. O .
. X .


Player 1's turn:
X . .
. O .
O X .


Player -1's turn:
X . .
X O .
O X .



Final Board:
X . O
X O .
O X .


DeepSeekNN wins!

DeepSeekNN vs DeepSeekRNN
Game 3 starts:
Player 1's turn:
. . .
. . .
. . .


Player -1's turn:
X . .
. . .
. . .


Player 1's turn:
X . .
. . .
O . .


Player -1's turn:
X . .
. X .
O . .


Player 1's turn:
X . .
. X .
O O .



Final Board:
X . .
. X .
O O X


DeepSeekNN wins!

DeepSeek

# ChatGPT Agents

GPT NN

In [None]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input
from sklearn.model_selection import train_test_split
import math

# Constants
BOARD_SIZE = 3
NUM_TRAINING_SAMPLES = 2000

# Create an empty board
def create_board():
    return np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)

# Get valid moves
def get_valid_moves(board):
    return [(i, j) for i in range(BOARD_SIZE) for j in range(BOARD_SIZE) if board[i, j] == 0]

# Apply a move to the board
def apply_move(board, move, player):
    if move in get_valid_moves(board):
        board[move[0], move[1]] = player
    else:
        raise ValueError(f"Invalid move: {move}")

# Check for a winner
def check_winner(board):
    for i in range(BOARD_SIZE):
        if abs(sum(board[i, :])) == BOARD_SIZE:  # Row check
            return np.sign(sum(board[i, :]))
        if abs(sum(board[:, i])) == BOARD_SIZE:  # Column check
            return np.sign(sum(board[:, i]))

    # Diagonals
    if abs(sum([board[i, i] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, i] for i in range(BOARD_SIZE)]))
    if abs(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)]))

    if 0 not in board:
        return 0  # Draw

    return None  # Game ongoing

# Minimax with alpha-beta pruning
def minimax(board, depth, maximizing_player, alpha=-math.inf, beta=math.inf):
    winner = check_winner(board)
    if winner is not None or depth == 0:
        return winner or 0  # 1 for win, -1 for loss, 0 for draw

    valid_moves = get_valid_moves(board)

    if maximizing_player:
        max_eval = -math.inf
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, 1)
            eval = minimax(temp_board, depth - 1, False, alpha, beta)
            max_eval = max(max_eval, eval)
            alpha = max(alpha, eval)
            if beta <= alpha:
                break
        return max_eval
    else:
        min_eval = math.inf
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, -1)
            eval = minimax(temp_board, depth - 1, True, alpha, beta)
            min_eval = min(min_eval, eval)
            beta = min(beta, eval)
            if beta <= alpha:
                break
        return min_eval

# Choose move using minimax
def minimax_move(board, player, depth=5):
    valid_moves = get_valid_moves(board)
    best_move = None
    best_value = -math.inf if player == 1 else math.inf

    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        move_value = minimax(temp_board, depth - 1, player == -1)

        if (player == 1 and move_value > best_value) or (player == -1 and move_value < best_value):
            best_value = move_value
            best_move = move

    return best_move

# Random player
def random_move(board, player):
    valid_moves = get_valid_moves(board)
    return random.choice(valid_moves) if valid_moves else None

# Heuristic player
def heuristic_move(board, player):
    valid_moves = get_valid_moves(board)
    if not valid_moves:
        return None

    # Try to win immediately
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        if check_winner(temp_board) == player:
            return move

    # Try to block opponent's winning move
    opponent = -player
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, opponent)
        if check_winner(temp_board) == opponent:
            return move

    # Take the center if available
    if (1, 1) in valid_moves:
        return (1, 1)

    # Take a corner if available
    corners = [(0, 0), (0, 2), (2, 0), (2, 2)]
    for corner in corners:
        if corner in valid_moves:
            return corner

    # Take any available move
    return random.choice(valid_moves)

# Generate training data
def generate_training_data(num_samples):
    X_data, y_data = [], []

    for _ in range(num_samples):
        board = create_board()
        moves = []
        player = 1 if random.random() < 0.5 else -1
        winner = None

        while True:
            valid_moves = get_valid_moves(board)

            # Choose player strategy
            player_type = random.choice(["minimax", "random", "heuristic"])

            if player_type == "minimax":
                move = minimax_move(board, player, depth=3)
            elif player_type == "random":
                move = random_move(board, player)
            elif player_type == "heuristic":
                move = heuristic_move(board, player)

            if move is None:
                break

            apply_move(board, move, player)
            moves.append((board.copy(), move, player))

            winner = check_winner(board)
            if winner is not None:
                break
            player *= -1

        for board_state, move, player in moves:
            X_data.append(board_state)
            y_data.append(move[0] * BOARD_SIZE + move[1])

    X_data = np.array(X_data).reshape(-1, BOARD_SIZE, BOARD_SIZE, 1)
    y_data_moves = tf.keras.utils.to_categorical(y_data, num_classes=BOARD_SIZE**2)
    return X_data, y_data_moves

# Build the model
def build_model():
    inputs = Input(shape=(BOARD_SIZE, BOARD_SIZE, 1))
    x = Flatten()(inputs)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    move_output = Dense(BOARD_SIZE**2, activation='softmax', name='move_output')(x)

    model = Model(inputs=inputs, outputs=move_output)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Predict move
def predict_move(model, board, player):
    board_input = board.copy() * player
    board_input = board_input.reshape(1, BOARD_SIZE, BOARD_SIZE, 1)
    predictions = model.predict(board_input, verbose=0)[0]

    valid_moves = get_valid_moves(board)
    move_probs = np.zeros_like(predictions)
    for move in valid_moves:
        idx = move[0] * BOARD_SIZE + move[1]
        move_probs[idx] = predictions[idx]

    best_move_idx = np.argmax(move_probs)
    move = (best_move_idx // BOARD_SIZE, best_move_idx % BOARD_SIZE)

    if move not in valid_moves:
        raise ValueError(f"Model predicted an invalid move: {move}")

    return move

# Main function
if __name__ == "__main__":
    print("Generating training data...")
    X, y_moves = generate_training_data(NUM_TRAINING_SAMPLES)
    print(f"Training data generated: {X.shape[0]} samples")

    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y_moves, test_size=0.2, random_state=42)

    # Build and train the model
    model = build_model()
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    print("Training the model...")
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=64,
        callbacks=[early_stopping]
    )

    # Save the model
    model.save("tictactoe_GPT_NN_model.h5")
    print("Model trained and saved successfully!")


Generating training data...
Training data generated: 15181 samples
Training the model...
Epoch 1/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.2576 - loss: 2.0875 - val_accuracy: 0.5390 - val_loss: 1.5392
Epoch 2/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.5001 - loss: 1.5148 - val_accuracy: 0.6490 - val_loss: 1.1164
Epoch 3/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5668 - loss: 1.2439 - val_accuracy: 0.6697 - val_loss: 0.9779
Epoch 4/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6002 - loss: 1.1400 - val_accuracy: 0.6862 - val_loss: 0.8985
Epoch 5/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6115 - loss: 1.0616 - val_accuracy: 0.7017 - val_loss: 0.8536
Epoch 6/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6



Model trained and saved successfully!


GPT CNN

In [None]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import math

# Constants
BOARD_SIZE = 3
NUM_TRAINING_SAMPLES = 8000

# Create an empty board
def create_board():
    return np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)

# Get valid moves
def get_valid_moves(board):
    return [(i, j) for i in range(BOARD_SIZE) for j in range(BOARD_SIZE) if board[i, j] == 0]

# Apply a move to the board
def apply_move(board, move, player):
    board[move[0], move[1]] = player

# Check for a winner
def check_winner(board):
    for i in range(BOARD_SIZE):
        if abs(sum(board[i, :])) == BOARD_SIZE:  # Row check
            return np.sign(sum(board[i, :]))
        if abs(sum(board[:, i])) == BOARD_SIZE:  # Column check
            return np.sign(sum(board[:, i]))

    # Diagonals
    if abs(sum([board[i, i] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, i] for i in range(BOARD_SIZE)]))
    if abs(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)]))

    if 0 not in board:
        return 0  # Draw

    return None  # Game ongoing

# Enhanced heuristic agent with adjacency prioritization
def enhanced_heuristic_agent(board, player):
    opponent = -player
    valid_moves = get_valid_moves(board)

    # Check for winning move
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        if check_winner(temp_board) == player:
            return move

    # Check for blocking move
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, opponent)
        if check_winner(temp_board) == opponent:
            return move

    # Prioritize moves adjacent to existing pieces
    adjacent_moves = []
    for move in valid_moves:
        for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1), (-1, -1), (1, 1), (-1, 1), (1, -1)]:
            adj_x, adj_y = move[0] + dx, move[1] + dy
            if 0 <= adj_x < BOARD_SIZE and 0 <= adj_y < BOARD_SIZE and board[adj_x, adj_y] == player:
                adjacent_moves.append(move)
                break

    if adjacent_moves:
        return random.choice(adjacent_moves)

    # Center
    center = ((1, 1))

    if center in valid_moves:
        return center


    return random.choice(valid_moves)

def minimax(board, depth, maximizing_player, alpha=-math.inf, beta=math.inf):
    winner = check_winner(board)
    if winner is not None or depth == 0:
        return winner or 0  # 1 for win, -1 for loss, 0 for draw

    valid_moves = get_valid_moves(board)

    # Check for immediate winning move and play it
    if maximizing_player:
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, 1)  # Player 1
            if check_winner(temp_board) == 1:
                return 1  # Winning move found
    else:
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, -1)  # Player -1
            if check_winner(temp_board) == -1:
                return -1  # Winning move found

    if maximizing_player:
        max_eval = -math.inf
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, 1)
            eval = minimax(temp_board, depth - 1, False, alpha, beta)
            max_eval = max(max_eval, eval)
            alpha = max(alpha, eval)
            if beta <= alpha:
                break
        return max_eval
    else:
        min_eval = math.inf
        for move in valid_moves:
            temp_board = board.copy()
            apply_move(temp_board, move, -1)
            eval = minimax(temp_board, depth - 1, True, alpha, beta)
            min_eval = min(min_eval, eval)
            beta = min(beta, eval)
            if beta <= alpha:
                break
        return min_eval


# Smart agent combining heuristic and minimax
def smart_agent(board, player):
    # Use heuristic for quick checks
    move = enhanced_heuristic_agent(board, player)
    if move:
        return move

    # Use minimax for strategic decision-making
    valid_moves = get_valid_moves(board)
    best_move = None
    best_score = -math.inf if player == 1 else math.inf

    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        score = minimax(temp_board, depth=3, maximizing_player=(player == 1))
        if (player == 1 and score > best_score) or (player == -1 and score < best_score):
            best_score = score
            best_move = move

    return best_move

def generate_training_data(num_samples):
    X_data, y_data = [], []

    for _ in range(num_samples):
      board = create_board()
      moves = []
      player = 1

    while True:
        if player == 1:
            move = enhanced_heuristic_agent(board, player)
        else:
            move = None
            best_score = -math.inf if player == 1 else math.inf

            for valid_move in get_valid_moves(board):
                temp_board = board.copy()
                apply_move(temp_board, valid_move, player)
                score = minimax(temp_board, depth=3, maximizing_player=(player == 1))

                if (player == 1 and score > best_score) or (player == -1 and score < best_score):
                    best_score = score
                    move = valid_move

        apply_move(board, move, player)
        moves.append((board.copy(), move, player))

        winner = check_winner(board)
        if winner is not None:
            break

        player *= -1

        for board_state, move, player in moves:
            reward = 1 if winner == player else -1 if winner == -player else 0
            X_data.append(board_state)
            y_data.append((move[0] * BOARD_SIZE + move[1], reward))

    X_data = np.array(X_data).reshape(-1, BOARD_SIZE, BOARD_SIZE, 1)
    y_data_moves = np.array([np.eye(BOARD_SIZE ** 2)[move] for move, _ in y_data])
    y_data_rewards = np.array([reward for _, reward in y_data])

    return X_data, y_data_moves, y_data_rewards

def custom_loss(y_true, y_pred):
    # Cross-entropy loss
    base_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

    # Penalize missing center move
    center_penalty = tf.reduce_mean(tf.where(y_true[:, 4] == 1, 0.0, 1.0))  # Center is index 4 in a 3x3 flattened board

    return base_loss + 0.1 * center_penalty  # Weight the penalty as needed


# Build a regularized model with the updated loss
def build_model():
    model = Sequential()

    # Input layer
    model.add(Input(shape=(BOARD_SIZE, BOARD_SIZE, 1)))

    # First convolutional block
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
    model.add(BatchNormalization())

    # Second convolutional block
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
    model.add(BatchNormalization())

    # Flatten and dense layers
    model.add(Flatten())
    model.add(Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
    model.add(Dropout(0.5))

    # Output layer
    model.add(Dense(BOARD_SIZE * BOARD_SIZE, activation='softmax'))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss=custom_loss,
                  metrics=['accuracy'])
    return model


# Adjust training loop
if __name__ == "__main__":
    print("Generating training, validation, and test data...")

    X_train, y_train_moves, y_train_rewards = generate_training_data(8000)
    X_val, y_val_moves, y_val_rewards = generate_training_data(1000)
    X_test, y_test_moves, y_test_rewards = generate_training_data(1000)

    # Build and train the model
    model = build_model()
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-5)

    print("Training the model...")
    model.fit(
        X_train, y_train_moves,
        validation_data=(X_val, y_val_moves),
        sample_weight=1 + y_train_rewards,  # Emphasizes rewards, including blocking
        epochs=5,
        batch_size=32,
        callbacks=[early_stopping, lr_scheduler]
    )

    # Evaluate the model on the test set
    print("Evaluating the model on the test set...")
    test_loss, test_accuracy = model.evaluate(X_test, y_test_moves)
    print(f"Test Accuracy: {test_accuracy:.4f}")
    model.save("GPT CNN Model.h5")


Generating training, validation, and test data...
Training the model...
Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step - accuracy: 0.0000e+00 - loss: 13.4396 - val_accuracy: 0.0000e+00 - val_loss: 11.6011 - learning_rate: 0.0010
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 278ms/step - accuracy: 0.3333 - loss: 11.0649 - val_accuracy: 0.6667 - val_loss: 11.4609 - learning_rate: 0.0010
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.6667 - loss: 10.9779 - val_accuracy: 0.6667 - val_loss: 11.3322 - learning_rate: 0.0010
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step - accuracy: 0.6667 - loss: 9.7971 - val_accuracy: 0.6667 - val_loss: 11.2112 - learning_rate: 0.0010
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step - accuracy: 0.3333 - loss: 11.1889 - val_accuracy: 1.0000 - val_loss: 11.0957 - learning_rate: 0.0010



Test Accuracy: 0.6667


GPT RNN

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.optimizers import Adam

# Define the Tic Tac Toe environment
def initialize_board():
    return np.zeros((3, 3), dtype=int)

def is_winner(board, player):
    for i in range(3):
        if all(board[i, :] == player) or all(board[:, i] == player):
            return True
    if all([board[i, i] == player for i in range(3)]) or all([board[i, 2 - i] == player for i in range(3)]):
        return True
    return False

def is_draw(board):
    return np.all(board != 0)

def available_moves(board):
    return [(i, j) for i in range(3) for j in range(3) if board[i, j] == 0]

def make_move(board, move, player):
    board[move] = player

def generate_similar_games():
    """
    Generate game states similar to the tournament scenarios where Model 3 struggled.
    """
    games = []
    for _ in range(100):
        board = initialize_board()
        # Example scenario: The opponent is about to win unless blocked
        board[0, 2], board[1, 1], board[2, 0] = -1, -1, 0
        games.append((board, -1))  # -1's turn

        board = initialize_board()
        # Example scenario: A winning move is available
        board[0, 0], board[1, 1], board[0, 1] = 1, 1, 0
        games.append((board, 1))  # 1's turn

    return games

# Define the RNN model
def create_rnn_model():
    model = Sequential([
        SimpleRNN(128, activation="relu", input_shape=(9, 1)),
        Dense(64, activation="relu"),
        Dense(9, activation="linear")
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss="mse")
    return model

# Encode the board state
def encode_board(board):
    return board.flatten().reshape(1, 9, 1)

# Choose a move based on the model's prediction
def choose_move(model, board, epsilon=0.1):
    if np.random.rand() < epsilon:
        return random.choice(available_moves(board))
    predictions = model.predict(encode_board(board), verbose=0)
    sorted_moves = np.argsort(predictions[0])[::-1]
    for move in sorted_moves:
        x, y = divmod(move, 3)
        if (x, y) in available_moves(board):
            return (x, y)

# Train the model
def train_model(model, games, epochs=10):
    x_train, y_train = [], []

    for board, player in games:
        moves = available_moves(board)
        for move in moves:
            temp_board = board.copy()
            make_move(temp_board, move, player)

            reward = 0
            if is_winner(temp_board, player):
                reward = 1
            elif is_draw(temp_board):
                reward = 0.5

            x_train.append(encode_board(board).flatten())
            target = model.predict(encode_board(board), verbose=0)[0]
            target[move[0] * 3 + move[1]] = reward
            y_train.append(target)

    x_train = np.array(x_train)
    y_train = np.array(y_train)
    model.fit(x_train, y_train, epochs=epochs, verbose=1)

# Self-play
def self_play_training(model, iterations):
    for _ in range(iterations):
        state = np.zeros((3, 3))  # Initialize an empty board
        player = 1
        history = []

        while True:
            # Model chooses a move
            encoded_state = encode_board(state).reshape(1, 9).astype("float32")
            predictions = model.predict(encoded_state, verbose=0).flatten()

            # Pick the move with the highest value that's valid
            valid_moves = [(i, j) for i in range(3) for j in range(3) if state[i, j] == 0]
            move_values = {move: predictions[move[0] * 3 + move[1]] for move in valid_moves}
            move = max(move_values, key=move_values.get)

            # Apply the move
            state[move] = player
            history.append((state.copy(), player, move))

            # Check if the game is over
            winner = check_winner(state)
            if winner is not None:
                # Assign rewards for the moves in reverse order
                reward = 1 if winner == player else -1
                for past_state, past_player, past_move in reversed(history):
                    target = model.predict(encode_board(past_state).reshape(1, 9).astype("float32"), verbose=0).flatten()
                    target[past_move[0] * 3 + past_move[1]] = reward
                    reward *= 0.9  # Discount factor
                    model.fit(
                        encode_board(past_state).reshape(1, 9).astype("float32"),
                        target.reshape(1, -1),
                        verbose=0,
                    )
                break

            # Switch player
            player = -player

# Main function
def main():
    model = create_rnn_model()

    # Targeted training based on scenarios where Model 3 struggled
    similar_games = generate_similar_games()
    train_model(model, similar_games, epochs=20)

    # Self-play to refine strategies
    self_play_training(model, iterations=100)

    # Save the updated model
    model.save("GPT RNN Model.h5")

if __name__ == "__main__":
    main()


Epoch 1/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0133
Epoch 2/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0131
Epoch 3/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0139
Epoch 4/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0134
Epoch 5/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0133
Epoch 6/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0144
Epoch 7/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0131
Epoch 8/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0128
Epoch 9/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0134
Epoch 10/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0138
Epoch 11/



ChatGPT TOURNAMENT

In [None]:
import numpy as np
from tensorflow.keras.models import load_model
import time
from tensorflow.keras.losses import MeanSquaredError
BOARD_SIZE = 3

model1 = load_model("tictactoe_GPT_NN_model.h5", compile=False)
model1.compile(optimizer="adam", loss=MeanSquaredError())

model2 = load_model("/content/GPT CNN Model.h5", compile=False)
model2.compile(optimizer="adam", loss=MeanSquaredError())

model3 = load_model("/content/GPT RNN Model.h5", compile=False)
model3.compile(optimizer="adam", loss=MeanSquaredError())
custom_objects={"mse": MeanSquaredError()}
model_names = {
    model1: "ChatGPT NN",
    model2: "ChatGPT CNN",
    model3: "ChatGPT RNN"
}
def display_board(board):
    symbols = {0: '.', 1: 'X', -1: 'O'}
    for row in board:
        print(" ".join(symbols[cell] for cell in row))
    print("\n")

def get_valid_moves(board):
    return [(i, j) for i in range(BOARD_SIZE) for j in range(BOARD_SIZE) if board[i, j] == 0]
def apply_move(board, move, player):
    board[move[0], move[1]] = player
def check_winner(board):
    for i in range(BOARD_SIZE):
        if abs(sum(board[i, :])) == BOARD_SIZE:
            return np.sign(sum(board[i, :]))
        if abs(sum(board[:, i])) == BOARD_SIZE:
            return np.sign(sum(board[:, i]))


    if abs(sum([board[i, i] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, i] for i in range(BOARD_SIZE)]))
    if abs(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)])) == BOARD_SIZE:
        return np.sign(sum([board[i, BOARD_SIZE - i - 1] for i in range(BOARD_SIZE)]))

    if 0 not in board:
        return 0

    return None
def predict_move(model, board, player):
    board_input = board.copy() * player

    if len(model.input_shape) == 4 and model.input_shape[1:] == (3, 3, 1):
        board_input = board_input.reshape(1, BOARD_SIZE, BOARD_SIZE, 1).astype("float32")
    elif len(model.input_shape) == 2 and model.input_shape[1] == 9:
        board_input = board_input.reshape(1, BOARD_SIZE * BOARD_SIZE).astype("float32")
    elif len(model.input_shape) == 3 and model.input_shape[1:] == (9, 1):
        board_input = board_input.reshape(1, BOARD_SIZE * BOARD_SIZE, 1).astype("float32")
    else:
        raise ValueError(f"Unexpected input shape for the model: {model.input_shape}")

    valid_moves = get_valid_moves(board)
    for move in valid_moves:
        temp_board = board.copy()
        apply_move(temp_board, move, player)
        if check_winner(temp_board) == player:
            return move

    predictions = model.predict(board_input, verbose=0)[0].flatten()
    move_probs = np.zeros_like(predictions)
    for move in valid_moves:
        idx = move[0] * BOARD_SIZE + move[1]
        move_probs[idx] = predictions[idx]

    best_move_idx = np.argmax(move_probs)
    return (best_move_idx // BOARD_SIZE, best_move_idx % BOARD_SIZE)
def play_game(model_a, model_b, game_num):
    board = np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)
    player = 1

    print(f"Game {game_num} starts:")
    while True:
        print(f"Player {player}'s turn:")
        display_board(board)

        move = predict_move(model_a if player == 1 else model_b, board, player)
        apply_move(board, move, player)
        winner = check_winner(board)
        if winner is not None:
            print("\nFinal Board:")
            display_board(board)
            return winner

        player *= -1
def conduct_tournament(num_games=3):
    models = [model1, model2, model3]
    scores = {model_names[m]: 0 for m in models}
    scores["Draw"] = 0

    game_num = 1
    for i in range(len(models)):
        for j in range(i + 1, len(models)):
            model_a, model_b = models[i], models[j]
            name_a, name_b = model_names[model_a], model_names[model_b]

            for _ in range(num_games):
                print(f"{name_a} vs {name_b}")
                winner = play_game(model_a, model_b, game_num)
                if winner == 1:
                    print(f"{name_a} wins!\n")
                    scores[name_a] += 1
                elif winner == -1:
                    print(f"{name_b} wins!\n")
                    scores[name_b] += 1
                else:
                    print("It's a draw!\n")
                    scores["Draw"] += 1
                game_num += 1
                time.sleep(1)

            for _ in range(num_games):
                print(f"{name_b} vs {name_a}")
                winner = play_game(model_b, model_a, game_num)
                if winner == 1:
                    print(f"{name_b} wins!\n")
                    scores[name_b] += 1
                elif winner == -1:
                    print(f"{name_a} wins!\n")
                    scores[name_a] += 1
                else:
                    print("It's a draw!\n")
                    scores["Draw"] += 1
                game_num += 1
                time.sleep(1)

    print("Tournament Results:")
    for model, score in scores.items():
        print(f"{model}: {score}")

if __name__ == "__main__":
    conduct_tournament(num_games=1)


ChatGPT NN vs ChatGPT CNN
Game 1 starts:
Player 1's turn:
. . .
. . .
. . .


Player -1's turn:
X . .
. . .
. . .


Player 1's turn:
X . .
. O .
. . .


Player -1's turn:
X . X
. O .
. . .


Player 1's turn:
X . X
O O .
. . .



Final Board:
X X X
O O .
. . .


ChatGPT NN wins!

ChatGPT CNN vs ChatGPT NN
Game 2 starts:
Player 1's turn:
. . .
. . .
. . .


Player -1's turn:
. . .
. X .
. . .


Player 1's turn:
O . .
. X .
. . .


Player -1's turn:
O . X
. X .
. . .


Player 1's turn:
O . X
. X .
O . .


Player -1's turn:
O . X
X X .
O . .


Player 1's turn:
O . X
X X O
O . .


Player -1's turn:
O . X
X X O
O . X


Player 1's turn:
O O X
X X O
O . X



Final Board:
O O X
X X O
O X X


It's a draw!

ChatGPT NN vs ChatGPT RNN
Game 3 starts:
Player 1's turn:
. . .
. . .
. . .


Player -1's turn:
X . .
. . .
. . .


Player 1's turn:
X . O
. . .
. . .


Player -1's turn:
X . O
. X .
. . .


Player 1's turn:
X O O
. X .
. . .



Final Board:
X O O
. X .
. . X


ChatGPT NN wins!

ChatGPT RNN vs

# GPT vs DeepSeek and them combined vs a perfect model tournaments

FINAL TOURNAMENT

In [None]:
import numpy as np
import tensorflow as tf
import random
from itertools import permutations
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanSquaredError

def load_models(model_paths):
    return {path: tf.keras.models.load_model(path, compile=False) for path in model_paths}

BOARD_SIZE = 3
def get_valid_moves(board):
    return [(i, j) for i in range(BOARD_SIZE) for j in range(BOARD_SIZE) if board[i, j] == 0]

# Apply move
def apply_move(board, move, player):
    board[move[0], move[1]] = player

# Check for a winner
def check_winner(board):
    for i in range(BOARD_SIZE):
        if abs(sum(board[i, :])) == BOARD_SIZE:
            return np.sign(sum(board[i, :]))
        if abs(sum(board[:, i])) == BOARD_SIZE:
            return np.sign(sum(board[:, i]))
    if abs(sum(board.diagonal())) == BOARD_SIZE:
        return np.sign(sum(board.diagonal()))
    if abs(sum(np.fliplr(board).diagonal())) == BOARD_SIZE:
        return np.sign(sum(np.fliplr(board).diagonal()))
    if 0 not in board:
        return 0
    return None
def predict_move(model, board, player):
    input_shape = model.input_shape
    board_input = board.copy() * player

    if len(input_shape) == 4:
        board_input = board_input.reshape(1, 3, 3, 1)
    elif len(input_shape) == 3:
        board_input = board_input.flatten().reshape(1, 9, 1)
    elif len(input_shape) == 2:
        board_input = board_input.flatten().reshape(1, 9)
    else:
        raise ValueError(f"Unsupported input shape: {input_shape}")

    predictions = model.predict(board_input, verbose=0)[0]
    valid_moves = get_valid_moves(board)
    move_probs = np.zeros(BOARD_SIZE * BOARD_SIZE)

    for move in valid_moves:
        idx = move[0] * BOARD_SIZE + move[1]
        move_probs[idx] = predictions[idx]

    best_move_idx = np.argmax(move_probs)
    return (best_move_idx // BOARD_SIZE, best_move_idx % BOARD_SIZE)
def play_game(model1, model2):
    board = np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)
    player = 1
    models = {1: model1, -1: model2}

    while True:
        move = predict_move(models[player], board, player)
        if move not in get_valid_moves(board):
            return -player
        apply_move(board, move, player)

        winner = check_winner(board)
        if winner is not None:
            return winner
        player *= -1
def run_tournament(model_paths):
    models = load_models(model_paths)
    scores = {name: 0 for name in model_paths}

    for model1, model2 in permutations(model_paths, 2):
        result = play_game(models[model1], models[model2])
        if result == 1:
            scores[model1] += 3
        elif result == -1:
            scores[model2] += 3
        else:
            scores[model1] += 1
            scores[model2] += 1

    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    print("Tournament Results:")
    for idx, (model_name, score) in enumerate(sorted_scores):
        print(f"Rank {idx + 1}: {model_name} with {score} points")

if __name__ == "__main__":
    model_paths = [
        "DeepSeekRNN.h5", "DeepSeek CNN.h5", "DeepSeek NN.h5", "tictactoe_GPT_NN_model.h5", "GPT CNN Model.h5", "GPT RNN Model.h5"
    ]
    run_tournament(model_paths)


Tournament Results:
Rank 1: DeepSeek NN.h5 with 23 points
Rank 2: tictactoe_GPT_NN_model.h5 with 21 points
Rank 3: DeepSeek CNN.h5 with 16 points
Rank 4: GPT RNN Model.h5 with 12 points
Rank 5: GPT CNN Model.h5 with 7 points
Rank 6: DeepSeekRNN.h5 with 4 points


TOURNAMENT VS MY MODEL

In [None]:
import numpy as np
import tensorflow as tf

BOARD_SIZE = 3
NUM_MODELS = 6

def load_models(model_paths):
    return [tf.keras.models.load_model(path, compile=False) for path in model_paths]

def get_valid_moves(board):
    return [(i, j) for i in range(BOARD_SIZE) for j in range(BOARD_SIZE) if board[i, j] == 0]

def apply_move(board, move, player):
    board[move[0], move[1]] = player


def check_winner(board):
    for i in range(BOARD_SIZE):
        if abs(sum(board[i, :])) == BOARD_SIZE:
            return np.sign(sum(board[i, :]))
        if abs(sum(board[:, i])) == BOARD_SIZE:
            return np.sign(sum(board[:, i]))
    if abs(sum(board.diagonal())) == BOARD_SIZE:
        return np.sign(sum(board.diagonal()))
    if abs(sum(np.fliplr(board).diagonal())) == BOARD_SIZE:
        return np.sign(sum(np.fliplr(board).diagonal()))
    if 0 not in board:
        return 0
    return None

def predict_move(model, board, player):
    input_shape = model.input_shape
    board_input = board.copy() * player

    if len(input_shape) == 4:
        board_input = board_input.reshape(1, 3, 3, 1)
    elif len(input_shape) == 3:
        board_input = board_input.flatten().reshape(1, 9, 1)
    elif len(input_shape) == 2:
        board_input = board_input.flatten().reshape(1, 9)
    else:
        raise ValueError(f"Unsupported input shape: {input_shape}")

    predictions = model.predict(board_input, verbose=0)[0]

    valid_moves = get_valid_moves(board)
    move_probs = np.zeros(BOARD_SIZE * BOARD_SIZE)

    for move in valid_moves:
        idx = move[0] * BOARD_SIZE + move[1]
        move_probs[idx] = predictions[idx]

    best_move_idx = np.argmax(move_probs)
    return (best_move_idx // BOARD_SIZE, best_move_idx % BOARD_SIZE)

def play_game(model1, model2):
    board = np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)
    player = 1
    models = {1: model1, -1: model2}

    while True:
        move = predict_move(models[player], board, player)
        if move not in get_valid_moves(board):
            return -player
        apply_move(board, move, player)

        winner = check_winner(board)
        if winner is not None:
            return winner
        player *= -1

def evaluate_models(model_paths, reference_model_path):
    models = load_models(model_paths)
    reference_model = tf.keras.models.load_model(reference_model_path, compile=False)

    results = {i: {'wins': 0, 'draws': 0, 'losses': 0} for i in range(NUM_MODELS)}
    ref_results = {'wins': 0, 'draws': 0, 'losses': 0}  # Reference model stats

    for i in range(NUM_MODELS):
        result = play_game(models[i], reference_model)
        if result == 1:
            results[i]['wins'] += 1
            ref_results['losses'] += 1
        elif result == -1:
            results[i]['losses'] += 1
            ref_results['wins'] += 1
        else:
            results[i]['draws'] += 1
            ref_results['draws'] += 1


        result = play_game(reference_model, models[i])
        if result == -1:
            results[i]['wins'] += 1
            ref_results['losses'] += 1
        elif result == 1:
            results[i]['losses'] += 1
            ref_results['wins'] += 1
        else:
            results[i]['draws'] += 1
            ref_results['draws'] += 1


    print("\nEvaluation Results against Reference Model:")
    for model_idx, score in results.items():
        print(f"Model {model_idx}: {score['wins']} Wins, {score['draws']} Draws, {score['losses']} Losses")


    print("\nReference Model Performance:")
    print(f"Wins: {ref_results['wins']}, Draws: {ref_results['draws']}, Losses: {ref_results['losses']}")

if __name__ == "__main__":
    model_paths = [
        "DeepSeekRNN.h5", "DeepSeek CNN.h5", "DeepSeek NN.h5", "tictactoe_GPT_NN_model.h5", "GPT CNN Model.h5", "GPT RNN Model.h5"
    ]
    reference_model_path = "/content/tic_tac_toe_nn_vtorobid_tf.keras"
    evaluate_models(model_paths, reference_model_path)



Evaluation Results against Reference Model:
Model 0: 0 Wins, 0 Draws, 2 Losses
Model 1: 0 Wins, 0 Draws, 2 Losses
Model 2: 0 Wins, 1 Draws, 1 Losses
Model 3: 0 Wins, 2 Draws, 0 Losses
Model 4: 0 Wins, 1 Draws, 1 Losses
Model 5: 0 Wins, 0 Draws, 2 Losses

Reference Model Performance:
Wins: 8, Draws: 4, Losses: 0
