In [None]:
import random

class TicTacToe:
    def __init__(self, smartMovePlayer1=0, playerSQN=None):
        """
        Initializes a TicTacToe game.

        Parameters:
        smartMovePlayer1 (float): The probability that Player 1 will make a smart move. Should be between 0 and 1.
                                  During a smart move, Player 1 attempts to win or block the opponent.
                                  During a non-smart move, Player 1 uniformly randomly selects a valid action.
        playerSQN (PlayerSQN): The player that controls Player 2, typically an instance of the PlayerSQN class.

        Attributes:
        board (list): A list of 9 elements representing the current game board.
        current_winner (int or None): Tracks the winner of the game. None if no player has won yet.
        smartMovePlayer1 (float): Probability of Player 1 making a smart move.
        playerSQN (PlayerSQN): Player 2, which will eventually be implemented as a Shallow Q-Network.
        """
        self.board = [0] * 9  # Board is represented as a list of 9 elements
        self.current_winner = None
        assert 0 <= smartMovePlayer1 <= 1, "Probability of Smart Move must lie between 0 and 1"
        self.smartMovePlayer1 = smartMovePlayer1
        self.playerSQN = playerSQN

    def print_board(self):
        board_symbols = [' ' if x == 0 else 'X' if x == 1 else 'O' for x in self.board]
        print("\nBoard:")
        for i in range(3):
            print(f" {board_symbols[3 * i]} | {board_symbols[3 * i + 1]} | {board_symbols[3 * i + 2]} ")
            if i < 2:
                print("---+---+---")
        print()

    def is_valid_move(self, position):
        return self.board[position] == 0

    def make_move(self, position, player):
        if self.is_valid_move(position):
            self.board[position] = player
            if self.check_winner(player):
                self.current_winner = player
            return True
        return False

    def check_winner(self, player):
        # Check all win conditions
        win_conditions = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # rows
            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # columns
            [0, 4, 8], [2, 4, 6]              # diagonals
        ]
        for condition in win_conditions:
            if all(self.board[i] == player for i in condition):
                return True
        return False

    def empty_positions(self):
        return [i for i in range(9) if self.board[i] == 0]

    def is_full(self):
        return all(x != 0 for x in self.board)

    def player1_move(self):
        if random.random() < self.smartMovePlayer1:
            # Smart move: Try to win or block opponent
            position = self.get_smart_move()
            if position is None:
                # If no winning or blocking move, pick randomly
                position = random.choice(self.empty_positions())
        else:
            # Random move
            position = random.choice(self.empty_positions())
        self.make_move(position, 1)
        print(f"Player 1 (Smart/Random) chooses position {position + 1}")

    def get_smart_move(self):
        # Check if Player 1 can win in the next move
        for position in self.empty_positions():
            self.board[position] = 1
            if self.check_winner(1):
                self.board[position] = 0
                return position
            self.board[position] = 0

        
        for position in self.empty_positions():
            self.board[position] = 2
            if self.check_winner(2):
                self.board[position] = 0
                return position
            self.board[position] = 0

        return None

    def playerSQN_move(self):
        valid_move = False
        while not valid_move:
            try:
                position = self.playerSQN.move(self.board.copy())
                if position in self.empty_positions():
                    valid_move = True
                    self.make_move(position, 2)
                else:
                    print("Invalid move, position already taken. Try again.")
            except ValueError:
                print("Invalid input, please enter a number between 1 and 9.")

    def play_game(self):
        # Player 1 is always the random or smart player, Player 2 will be SQN in the future
        self.print_board()
        player_turn = 1  # Player 1 starts
        while not self.is_full() and self.current_winner is None:
            if player_turn == 1:
                self.player1_move()
                player_turn = 2
            else:
                self.playerSQN_move()
                player_turn = 1
            self.print_board()
    
        if self.current_winner:
            winner = "Player 1 (Smart/Random)" if self.current_winner == 1 else "Player 2 (You)"
            print(f"{winner} wins!")
        else:
            print("It's a draw!")

    def get_reward(self):
        """
        Returns the reward for Player 2 (PlayerSQN):
        1 if Player 2 wins, -1 if Player 1 wins, 0 for a draw.
        """
        if self.current_winner == 2:
            return 1
        elif self.current_winner == 1:
            return -1
        else:
            return 0


In [14]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

model = Sequential()
model.add(Dense(64, input_dim=9, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(9, activation='linear'))
model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))

replay_buffer = deque(maxlen=10000)
states, actions, rewards = [], [], []

def play_game(smart_prob):
    game = TicTacToe(smartMovePlayer1=smart_prob)
    while not game.is_full() and game.current_winner is None:
        state = np.array(game.board)
        action = random.choice(game.empty_positions())
        states.append(state)
        actions.append(action)
        game.make_move(action, 2)
        if not game.is_full() and game.current_winner is None:
            game.player1_move()
    return game.get_reward()

SIMULATIONS = 2000
EPOCHS = 1
EPISODE = 5000

print(f"Running {SIMULATIONS} initial simulations...")
for _ in range(SIMULATIONS):
    reward = play_game(0.0)
    rewards.extend([reward] * len(states))

X = np.array(states)
y = np.zeros((len(X), 9))
for i in range(len(X)):
    y[i, actions[i]] = rewards[i]

print("\nStarting Training...")
print("Epoch | Win% | Loss% | Draw% | Avg Reward | vs Smart%")
print("-" * 50)

for epoch in range(EPOCHS):
    model.fit(X, y, batch_size=32, epochs=1, verbose=0)
    
    wins, losses, draws = 0, 0, 0
    smart_prob = min(0.8, epoch / EPOCHS)
    
    for _ in range(EPISODE):
        game = TicTacToe(smartMovePlayer1=smart_prob)
        while not game.is_full() and game.current_winner is None:
            state = np.array(game.board)
            q_values = model.predict(state.reshape(1, -1), verbose=0)[0]
            valid_moves = game.empty_positions()
            for i in range(9):
                if i not in valid_moves:
                    q_values[i] = float('-inf')
            action = np.argmax(q_values)
            game.make_move(action, 2)
            if not game.is_full() and game.current_winner is None:
                game.player1_move()
        
        if game.current_winner == 2:
            wins += 1
        elif game.current_winner == 1:
            losses += 1
        else:
            draws += 1

    win_rate = wins / EPISODE * 100
    loss_rate = losses / EPISODE * 100
    draw_rate = draws / EPISODE * 100
    avg_reward = (wins - losses) / EPISODE
    
    print(f"{epoch:3d}  | {win_rate:4.1f} | {loss_rate:4.1f} | {draw_rate:4.1f} | {avg_reward:6.2f}    | {smart_prob:4.2f}")

model.save('2021A7PS2627G_jYPTER.h5')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Running 2000 initial simulations...
Player 1 (Smart/Random) chooses position 3
Player 1 (Smart/Random) chooses position 4
Player 1 (Smart/Random) chooses position 2
Player 1 (Smart/Random) chooses position 6
Player 1 (Smart/Random) chooses position 7
Player 1 (Smart/Random) chooses position 4
Player 1 (Smart/Random) chooses position 4
Player 1 (Smart/Random) chooses position 8
Player 1 (Smart/Random) chooses position 6
Player 1 (Smart/Random) chooses position 7
Player 1 (Smart/Random) chooses position 4
Player 1 (Smart/Random) chooses position 6
Player 1 (Smart/Random) chooses position 5
Player 1 (Smart/Random) chooses position 9
Player 1 (Smart/Random) chooses position 3
Player 1 (Smart/Random) chooses position 8
Player 1 (Smart/Random) chooses position 6
Player 1 (Smart/Random) chooses position 4
Player 1 (Smart/Random) chooses position 9
Player 1 (Smart/Random) chooses position 2
Player 1 (Smart/Random) chooses position 3
Player 1 (Smart/Random) chooses position 4
Player 1 (Smart/Ra



Player 1 (Smart/Random) chooses position 5
Player 1 (Smart/Random) chooses position 2
Player 1 (Smart/Random) chooses position 9
Player 1 (Smart/Random) chooses position 7
Player 1 (Smart/Random) chooses position 1
Player 1 (Smart/Random) chooses position 6
Player 1 (Smart/Random) chooses position 7
Player 1 (Smart/Random) chooses position 4
 99  | 16.0 | 58.0 | 26.0 |  -0.42    | 0.80
