In [9]:
# Import necessary libraries
import numpy as np
import random
import time
from typing import List, Tuple, Optional
import copy
import math

In [10]:
# Tic-Tac-Toe game environment
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3), dtype=int)  # 0: empty, 1: player 1, -1: player 2

    def reset(self):
        self.board = np.zeros((3, 3), dtype=int)

    def is_valid_move(self, x: int, y: int) -> bool:
        return self.board[x, y] == 0

    def make_move(self, x: int, y: int, player: int) -> bool:
        if self.is_valid_move(x, y):
            self.board[x, y] = player
            return True
        return False

    def undo_move(self, x: int, y: int, player: int) -> bool:
        if not self.is_valid_move(x, y):
            self.board[x, y] = 0
            return True
        return False

    def check_winner(self) -> Optional[int]:
        # Check rows, columns, and diagonals
        for i in range(3):
            if abs(sum(self.board[i, :])) == 3:
                return np.sign(sum(self.board[i, :]))
            if abs(sum(self.board[:, i])) == 3:
                return np.sign(sum(self.board[:, i]))
        if abs(self.board.trace()) == 3:
            return np.sign(self.board.trace())
        if abs(np.fliplr(self.board).trace()) == 3:
            return np.sign(np.fliplr(self.board).trace())
        return None if 0 in self.board else 0  # 0: draw, None: ongoing game

    def get_available_moves(self) -> List[Tuple[int, int]]:
        return [(i, j) for i in range(3) for j in range(3) if self.board[i, j] == 0]

    def display(self):
        display_board = self.board.astype(str)
        display_board[display_board == "0"] = "."
        display_board[display_board == "1"] = "X"
        display_board[display_board == "-1"] = "O"
        print("\n".join([" ".join(row) for row in display_board]))
        print()


In [11]:
# Minimax method

def myminimax(state: TicTacToe, player: int):
    status = state.check_winner()
    if status != None:
        return status , (-1 ,-1)
    best_move = (-1 , -1)
    max_point = -2
    for all_moves in state.get_available_moves():
        if state.make_move(all_moves[0] , all_moves[1] , player):
            point , temp= myminimax(state , player * -1)
            if max_point < point * player :
                max_point = point * player
                best_move = all_moves
            state.undo_move(all_moves[0] , all_moves[1] , player)
    
    return max_point * player , best_move

def minimax(state: TicTacToe, player: int) -> Tuple[int, int]:
    a , b = myminimax(state , player)
    return b

In [12]:
# Alpha-beta method
def myalpha_beta(state: TicTacToe, player: int , alpha:int , beta: int , maximizer: int):
    status = state.check_winner()
    if status != None:
        return status , (-1 ,-1)
    best_move = (-1 , -1)
    max_point = -2
    for all_moves in state.get_available_moves():
        if state.make_move(all_moves[0] , all_moves[1] , player):
            point , temp = myalpha_beta(state , player * -1 , alpha , beta , maximizer)
            if max_point < point * player:
                max_point = point * player
                best_move = all_moves
                if maximizer == player:
                    alpha = max (alpha , point * maximizer)
                    if (alpha >= beta):
                        state.undo_move(all_moves[0] , all_moves[1] , player)
                        break
                else:
                    beta = min(beta , point * maximizer)
                    if (alpha >= beta):
                        state.undo_move(all_moves[0] , all_moves[1] , player)
                        break
            state.undo_move(all_moves[0] , all_moves[1] , player)
    
    return max_point * player , best_move

def alpha_beta(state: TicTacToe, player: int) -> Tuple[int, int]:
    a , b = myalpha_beta(state , player , -100000000, +10000000, player)
    return b

In [72]:
# evaluatiob based method

def guess(state: TicTacToe, player: int): # it has a bug. it doesn't consider that sometimes it shouldn't count two in a row as score(because it's not possible). i didn't fix it because it would become very PERCISE for an eval function
    #nevermind . fixed it.
    score = player
    for i in range(3):
        c = state.board[i][0]
        for j in range(0 , 3):
            if c == state.board[i][j]:
                # the fix begins
                if j == 0 and c == state.board[i][2] and state.board[i][1]==0:
                    score += 1 if state.board[i][j]==1 else -1 if state.board[i][j] == -1 else 0
                    continue
                if j == 1 and not state.board[i][2]==0: 
                    continue
                if j == 2 and not state.board[i][0]==0:
                    continue
                #fix ends
                score += 1 if state.board[i][j]==1 else -1 if state.board[i][j] == -1 else 0
            c = state.board[i][j]

    for j in range(3):
        c = state.board[0][j]
        for i in range(0 , 3):
            if c == state.board[i][j]:
                # the fix begins
                #print('--------------------')
                #print(i)
                #print (state.board[2][j])
                #print (state.board[1][j])
                #print('--------------------')
                if (i == 0) and (c == state.board[2][j]) and (state.board[1][j]==0):
                    score += 1 if state.board[i][j]==1 else -1 if state.board[i][j] == -1 else 0
                    continue
                if i == 1 and not state.board[i + 1][j]==0: 
                    continue
                if i == 2 and not state.board[i - 2][j]==0:
                    continue
                #fix ends
                score += 1 if state.board[i][j]==1 else -1 if state.board[i][j] == -1 else 0
            c = state.board[i][j]

    c = state.board[1][1]
    if c == state.board[0][0] and state.board[2][2]==0:
        score += 1 if state.board[1][1]==1 else -1 if state.board[1][1] == -1 else 0
    if c == state.board[2][2] and state.board[0][0]==0:
        score += 1 if state.board[1][1]==1 else -1 if state.board[1][1] == -1 else 0
    if c == state.board[2][0] and state.board[0][2]==0:
        score += 1 if state.board[1][1]==1 else -1 if state.board[1][1] == -1 else 0
    if c == state.board[0][2] and state.board[2][0]==0:
        score += 1 if state.board[1][1]==1 else -1 if state.board[1][1] == -1 else 0
    
    #print ("log : ")
    #state.display()
    #print(score)
    return score

def eval_based_alphabeta(state: TicTacToe, player: int , alpha:int , beta: int , maximizer: int , depth: int):
    status = state.check_winner()
    if status != None:
        return status , (-1 ,-1)

    if depth > 3:
        return guess(state , player), (-1 , -1) 

    best_move = (-1 , -1)
    max_point = -200000000
    for all_moves in state.get_available_moves():
        if state.make_move(all_moves[0] , all_moves[1] , player):
            point , temp = eval_based_alphabeta(state , player * -1 , alpha , beta , maximizer , depth + 1)
            if max_point < point * player:
                max_point = point * player
                best_move = all_moves
                if maximizer == player:
                    alpha = max (alpha , point * maximizer)
                    if (alpha >= beta):
                        state.undo_move(all_moves[0] , all_moves[1] , player)
                        break
                else:
                    beta = min(beta , point * maximizer)
                    if (alpha >= beta):
                        state.undo_move(all_moves[0] , all_moves[1] , player)
                        break
            state.undo_move(all_moves[0] , all_moves[1] , player)
    
    return max_point * player , best_move

def evaluation_based(state: TicTacToe, player: int) -> Tuple[int, int]:
    a , b = eval_based_alphabeta(state , player , -100000000, +10000000, player , 0)
    if b[0] == -1:
        print('hell no')
    return b

In [66]:
# Monte Carlo Tree search method
class Node:
    def __init__ (self , state: TicTacToe , parent: Optional["Node"] , move: Tuple[int , int] , player: int):
        self.state = copy.deepcopy(state)
        self.parent = parent
        self.visit = 0
        self.turn = player
        self.reward = 0
        self.final_state = state.check_winner() 
        self.the_creator = move
        self.children = []

    def expand(self):
        all_moves = []
        for move in self.state.get_available_moves():
            if self.state.make_move(move[0], move[1] , self.turn):
                self.children.append (Node(self.state , self , move , self.turn * -1))
                self.state.undo_move(move[0] , move[1] , self.turn)

    def calc_ucb(self):
        if self.visit == 0:
            return float('inf')
        return (self.reward / self.visit) + math.sqrt(250) * math.sqrt(math.log(self.parent.visit) / self.visit)

    def give_me_ur_child(self) -> "Node":
        if len(self.children) == 0:
            self.expand()
        best = self
        max_ucb = -1000000000
        for child in self.children:
            if child.visit == 0:
                return child
            ucb = child.calc_ucb()
            if max_ucb < ucb:
                max_ucb = ucb
                best = child 
        
        if best == self:
            print ('panic')
        return best

def play_a_game(current_node: Node , player: int):
    current_node.visit += 1
    if current_node.final_state != None: #best is draw = win = 8 loss = -10
        a = player * current_node.final_state
        if a == 0:
            a = 8
        elif a < 0:
            a*=10
        else:
            a *= 8
        current_node.reward += a
        return a
    a = play_a_game(current_node.give_me_ur_child(), player )
    # current_node.reward += player * a
    current_node.reward += play_a_game(current_node.give_me_ur_child(), player )
    return a


def monte_carlo_tree_search(state: TicTacToe, player: int) -> Tuple[int, int]:
    root = Node (state , None , (-1 , -1) , player)
    for i in range (80): #best is 100
        play_a_game(root , player)
    
    best = root
    max_val = -1000000000
    for child in root.children:
        #print (child.reward)
        #print (child.visit)
        #print (child.the_creator)
        val = child.reward / child.visit
        if max_val < val:
            max_val = val
            best = child
    
    if best == root:
        print ('what')
    return best.the_creator

In [59]:
# Simulate a match between two methods
def simulate_game(method1, method2):
    game = TicTacToe()
    players = [1, -1]
    methods = {1: method1, -1: method2}
    times = {1:0, -1:0}
    counter = 0
    while game.check_winner() is None:
        counter += 1
        current_player = players[0]
        a = time.time()
        move = methods[current_player](game, current_player)
        times[current_player] += time.time() - a
        game.make_move(*move, current_player)
        players.reverse()
        #print("game:\n")
        game.display()
    print(f"player 1 avg time : {times[1] / ((counter + 1) // 2)}")
    print(f"player 2 avg time : {times[-1] / ((counter) // 2)}")
    return game.check_winner()

In [84]:
# Main function for testing
if __name__ == "__main__":
    # Example match (methods need implementation)
    winner = simulate_game(monte_carlo_tree_search,monte_carlo_tree_search)
    if winner == 1:
        print("Player 1 (Method 1) wins!")
    elif winner == -1:
        print("Player 2 (Method 2) wins!")
    else:
        print("It's a draw!")

. . .
. X .
. . .

O . .
. X .
. . .

O . X
. X .
. . .

O . X
. X .
O . .

O . X
X X .
O . .

O O X
X X .
O . .

O O X
X X X
O . .

player 1 avg time : 0.5726608037948608
player 2 avg time : 0.16744112968444824
Player 1 (Method 1) wins!
