In [2]:
# Import necessary libraries
import numpy as np
import random
import time
from typing import List, Tuple, Optional
from copy import deepcopy
from time import sleep
from datetime import datetime

In [3]:
random.seed(datetime.now().timestamp())

In [4]:
# Tic-Tac-Toe game environment
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3), dtype=int)  # 0: empty, 1: player 1, -1: player 2

    def reset(self):
        self.board = np.zeros((3, 3), dtype=int)

    def is_valid_move(self, x: int, y: int) -> bool:
        return self.board[x, y] == 0

    def make_move(self, x: int, y: int, player: int) -> bool:
        if self.is_valid_move(x, y):
            self.board[x, y] = player
            return True
        return False

    def check_winner(self) -> Optional[int]:
        # Check rows, columns, and diagonals
        for i in range(3):
            if abs(sum(self.board[i, :])) == 3:
                return np.sign(sum(self.board[i, :]))
            if abs(sum(self.board[:, i])) == 3:
                return np.sign(sum(self.board[:, i]))
        if abs(self.board.trace()) == 3:
            return np.sign(self.board.trace())
        if abs(np.fliplr(self.board).trace()) == 3:
            return np.sign(np.fliplr(self.board).trace())
        return None if 0 in self.board else 0  # 0: draw, None: ongoing game

    def get_available_moves(self) -> List[Tuple[int, int]]:
        return [(i, j) for i in range(3) for j in range(3) if self.board[i, j] == 0]

    def display(self):
        display_board = self.board.astype(str)
        display_board[display_board == "0"] = "."
        display_board[display_board == "1"] = "X"
        display_board[display_board == "-1"] = "O"
        print("\n".join([" ".join(row) for row in display_board]))
        print()
        # sleep(5)


In [5]:
# Minimax method
def _minimax(state: TicTacToe, player: int):
    """
    Implement the Minimax algorithm here.
    """
    cur_board = state.board
    winner = state.check_winner()
    if winner is not None:
        # print(state)
        # print(winner)
        return None, None, winner
    else:
        point = -2 if player == 1 else 2
        final_x, final_y = None, None
        for i, j in state.get_available_moves():
                new_state = TicTacToe()
                new_state.board = deepcopy(cur_board)
                new_state.make_move(i, j, player)                
                _, _, new_point = _minimax(new_state, -player)
                if player == 1 and new_point > point:
                    point = new_point
                    final_x, final_y = i, j
                if player == -1 and new_point < point:
                    point = new_point
                    final_x, final_y = i, j
        return final_x, final_y, point
    
def minimax(state: TicTacToe, player: int) -> Tuple[int, int]:
    i, j, _ = _minimax(state, player)
    return i, j

In [6]:
# Alpha-beta method
def _alpha_beta(state: TicTacToe, player: int, cur_val=None):
    """
    Implement the Alpha-Beta Pruning algorithm here.
    """
    cur_board = state.board
    winner = state.check_winner()
    if winner is not None:
        return None, None, winner
    else:
        point = -2 if player == 1 else 2
        final_x, final_y = None, None
        for i, j in state.get_available_moves():
                new_state = TicTacToe()
                new_state.board = deepcopy(cur_board)
                new_state.make_move(i, j, player)                
                _, _, new_point = _alpha_beta(new_state, -player, point)
                if player == 1 and new_point > point:
                    point = new_point
                    final_x, final_y = i, j
                    if cur_val and point >= cur_val:
                        break
                if player == -1 and new_point < point:
                    point = new_point
                    final_x, final_y = i, j
                    if cur_val and point <= cur_val:
                        break
        return final_x, final_y, point
    
def alpha_beta(state: TicTacToe, player: int) -> Tuple[int, int]:
     i, j, _ = _alpha_beta(state, player)
     return i, j

In [7]:
# evaluatiob based method
def _evaluate_board_mark(board, mark):
        eval = 0
        for i in range(3):
            if abs(sum(board[i, :])) >= 2:
                if np.sign(sum(board[i, :])) == mark:
                    eval += 1
            if abs(sum(board[:, i])) >= 2:
                if np.sign(sum(board[:, i])) == mark:
                    eval += 1
        if abs(board.trace()) >= 2:
            if np.sign(board.trace()) == mark:
                eval += 1
        if abs(np.fliplr(board).trace()) >= 2:
            if np.sign(np.fliplr(board).trace()) == mark:
                eval += 1
        return eval
    
def evaluate_board(board, turn):
    first_player_mul = 1 if turn == 1 else 2
    second_player_mul = 1 if turn == 2 else 2
    return _evaluate_board_mark(board, 1) * first_player_mul - _evaluate_board_mark(board, -1) * second_player_mul

def _evaluation_based(state: TicTacToe, player: int, depth=0, target=3):
    cur_board = state.board
    winner = state.check_winner()
    if winner is not None:
        return None, None, winner
    else:
        point = -2 if player == 1 else 2
        final_x, final_y = None, None
        for i, j in state.get_available_moves():
                new_state = TicTacToe()
                new_state.board = deepcopy(cur_board)
                new_state.make_move(i, j, player)
                if depth < target:
                    _, _, new_point = _evaluation_based(new_state, -player, depth+1, target)
                else:
                    new_point = evaluate_board(new_state.board, player)
                if player == 1 and new_point > point:
                    point = new_point
                    final_x, final_y = i, j
                if player == -1 and new_point < point:
                    point = new_point
                    final_x, final_y = i, j
        return final_x, final_y, point


def evaluation_based(state: TicTacToe, player: int) -> Tuple[int, int]:
    """
    Implement a heuristic evaluation-based decision-making algorithm here.
    """
    i, j, _ = _evaluation_based(state, player)
    return i, j

In [8]:
# Monte Carlo Tree search method
def _monte_carlo_tree_search(state: TicTacToe, player: int):
    nodes = dict()
    neighbors = state.get_available_moves()
    
    def _play_unitl_terminal(state: TicTacToe, player: int):
        
        winner = state.check_winner()
        if winner is not None:
            return winner
       
        available_neighbors = state.get_available_moves()
        random.shuffle(available_neighbors)
        selected_move = available_neighbors[0]

        new_state = TicTacToe()
        new_state.board = deepcopy(state.board)
        new_state.make_move(*selected_move, player)
        return _play_unitl_terminal(new_state, -player)
                    
    for _ in range(30):
        # we play for 30 times to evaluate the nodes
        chosen = random.choice(neighbors)
        new_state = TicTacToe()
        new_state.board = deepcopy(state.board)
        new_state.make_move(*chosen, player)
        result = _play_unitl_terminal(new_state, -player)
        point, played = nodes.get(chosen, (0, 0))
        point += result
        played += 1
        nodes[chosen] = (point, played)

    score = -1 if player == 1 else 1
    final_x, final_y = None, None
    for neighbor in neighbors:
        if neighbor not in nodes:
            continue
        point, played = nodes[neighbor]
        new_score = point / played
        # print(neighbor, new_score, point, played)
        # print(f"neighbor is {neighbors[i]}\nhast point {point} and played {played}\n")
        if player == 1 and new_score > score:
            score = new_score
            final_x, final_y = neighbor
        if player == -1 and new_score < score:
            score = new_score
            final_x, final_y = neighbor
    # print("final score: ", score)
    return final_x, final_y, score

def monte_carlo_tree_search(state: TicTacToe, player: int) -> Tuple[int, int]:
    """
    Implement the Monte Carlo Tree Search algorithm here.
    """
    i, j, _ = _monte_carlo_tree_search(state, player)
    return i, j

In [9]:
# Simulate a match between two methods
def simulate_game(method1, method2):
    game = TicTacToe()
    players = [1, -1]
    methods = {1: method1, -1: method2}

    stats = dict()
    elapsed_time = [0, 0]
    move_cnt = [0, 0]
    while game.check_winner() is None:
        current_player = players[0]
        start = time.time()
        move = methods[current_player](game, current_player)
        
        idx = 0 if current_player == 1 else 1
        elapsed_time[idx] += time.time() - start
        move_cnt[idx] += 1

        game.make_move(*move, current_player)
        players.reverse()
        # game.display()
    game.display()
    winner = game.check_winner()

    return winner, elapsed_time[0]/move_cnt[0], elapsed_time[1]/move_cnt[1] 

In [10]:
# Main function for testing
if __name__ == "__main__":
    # Example match (methods need implementation)

    stats = dict()
    modes = [
        (minimax, alpha_beta),
        (evaluation_based, minimax),
        *([(minimax, monte_carlo_tree_search)] * 30),
        (alpha_beta, evaluation_based),
        *([(monte_carlo_tree_search, alpha_beta)] * 30),
        *([(evaluation_based, monte_carlo_tree_search)] * 30),
    ]
    for p1_method, p2_method2 in modes:
        winner, avg1, avg2 = simulate_game(p1_method, p2_method2)

        name = p1_method.__name__ + "-" + p2_method2.__name__
        new_state = stats.get(name, {
            "p1": p1_method.__name__,
            "p2": p2_method2.__name__,
            "count": 0,
            "p1_win": 0,
            "p2_win": 0,
            "draw": 0,
            "avg1_sum": 0,
            "avg2_sum": 0,
            "avg1": 0,
            "avg2": 0,
        })

        new_state["count"] += 1
        new_state["p1_win"] += 1 if winner == 1 else 0
        new_state["p2_win"] += 1 if winner == -1 else 0
        new_state["draw"] += 1 if winner == 0 else 0
        new_state["avg1_sum"] += avg1
        new_state["avg2_sum"] += avg2
        new_state["avg1"] = new_state["avg1_sum"]/new_state["count"]
        new_state["avg2"] = new_state["avg2_sum"]/new_state["count"]

        stats[name] = new_state

        if winner == 1:
            print("Player 1 (Method 1) wins!")
        elif winner == -1:
            print("Player 2 (Method 2) wins!")
        else:
            print("It's a draw!")

X X O
O O X
X O X

It's a draw!
O X X
X X O
O O X

It's a draw!
X O .
X X X
O . O

Player 1 (Method 1) wins!
X X X
. . .
. O O

Player 1 (Method 1) wins!
X X O
. X .
O X O

Player 1 (Method 1) wins!
X X O
O X .
. X O

Player 1 (Method 1) wins!
X . O
X X X
O . O

Player 1 (Method 1) wins!
X O X
X O .
X . O

Player 1 (Method 1) wins!
X . O
X O X
X . O

Player 1 (Method 1) wins!
X X X
. . .
O . O

Player 1 (Method 1) wins!
X . O
X X X
. O O

Player 1 (Method 1) wins!
X X O
. X .
O O X

Player 1 (Method 1) wins!
X O .
X X O
X . O

Player 1 (Method 1) wins!
X X X
. O .
O . .

Player 1 (Method 1) wins!
X X O
. X .
O X O

Player 1 (Method 1) wins!
X X X
O O .
. . .

Player 1 (Method 1) wins!
X O .
X O .
X . .

Player 1 (Method 1) wins!
X X O
O O X
X O X

It's a draw!
X X O
X O O
X . .

Player 1 (Method 1) wins!
X . O
X . O
X . .

Player 1 (Method 1) wins!
X X X
O . .
. . O

Player 1 (Method 1) wins!
X X X
. O .
O . .

Player 1 (Method 1) wins!
X X X
. . .
O . O

Player 1 (Method 1) wins!
X X 

In [348]:
columns = ["p1", "p2", "count", "p1_win", "p2_win", "draw", "avg1", "avg2"]
widths = [30, 30, 6, 8, 8, 8, 10, 10]

# Format the header
header = "".join(f"{col:<{width}}" for col, width in zip(columns, widths))
print(header)
for v in stats.values():
    values = [v[col] for col in columns]
    print("".join(f"{val:<{width}.3f}" if isinstance(val, float) else f"{val:<{width}}" for val, width in zip(values, widths)))

p1                            p2                            count p1_win  p2_win  draw    avg1      avg2      
minimax                       alpha_beta                    1     0       0       1       1.004     0.022     
evaluation_based              minimax                       1     0       0       1       0.014     0.126     
minimax                       monte_carlo_tree_search       30    24      0       6       1.402     0.002     
alpha_beta                    evaluation_based              1     1       0       0       0.363     0.014     
monte_carlo_tree_search       alpha_beta                    30    0       15      15      0.002     0.045     
evaluation_based              monte_carlo_tree_search       30    26      0       4       0.018     0.002     
