### MTCS tree search
1. Selection
2. Expansion
3. Simulation
4. Backpropagation
# TicTacToe
1. Play
2. CheckWin
3. Get possible move

In [82]:
import numpy as np
import math
import random

In [86]:
PLAYER_X = 'X'
PLAYER_O = 'O'
EMPTY_SPOT = '_'
DRAW_MARKER = 'DRAW'

class TicTacToe: 
    def __init__(self, board_size): 
        """ 
        Initializes the Tic-Tac-Toe game. 
        """ 
        self.board_size = board_size
        self.current_player = PLAYER_X
        self.board = np.array([[EMPTY_SPOT]*3 for _ in range(3)])
        self.winner = None 

    # This modifies the self.board in-place but in MCST we need a method to create a new game state without altering the original.

    def copy(self): 
        """
        Creates a deep copy of the current game state. 

        Returns: 
            TicTacToe: A new TicTacToe object with the same state as the current one
        """ 
        new_game = TicTacToe(self.board_size)
        new_game.board = np.copy(self.board)
        new_game.current_player = self.current_player
        new_game.winner = self.winner
        return new_game

    def switch_player(self):
        if self.current_player == PLAYER_X: 
            self.current_player = PLAYER_O
        else: 
            self.current_player = PLAYER_X
            

    def _check_line_win(self, line): 
        """"
        A little helper function to check if all elements in a line are the same and not empty

        line: either row or column
        """
        first_element = line[0]
        if line[0] == EMPTY_SPOT: 
            return False
        return np.all(line == first_element)

    def check_win_or_draw(self): 
        """
        Check if there is a winner of if the gaem is a draw. 
        Set self.winner if the game has concluded. 

        Returns: 
            str or None: The winner (PLAYER_X, PLAYER_O), DRAW_MARKER, or None if ongoing
        """ 

        # Check rows
        for i in range(self.board_size): 
            if self._check_line_win(self.board[i,:]): 
                self.winner = self.board[i,0]
                return self.winner

        # Check columns
        for i in range(self.board_size): 
            if self._check_line_win(self.board[:, i]): 
                self.winner = self.board[0, i]
                return self.winner

        # Check diagonals (top-left to bottom-right)
        if self._check_line_win(np.diag(self.board)):  
            self.winner = self.board[0][0]

        # Check antidiagonals (top-right to bottom-left)
        if self._check_line_win(np.diag(np.fliplr(self.board))): # np.fliplr reverses columns 
            self.winner = self.board[0][self.board_size]

        # Check draws: 
        if EMPTY_SPOT not in self.board: 
            self.winner = DRAW_MARKER
            return self.winner

        return None
    def is_game_over(self):
        """ 
        Checks if the game has ended 
        """ 
        return self.winner is not None
    
    def play(self, r, c): 
        '''Attempted to make a move. 
        Args: 
            r (int): The row index of the move
            c (int): The column index of the move

        Returns: 
            bool: True if the move was successful, False otherwise (e.g., the game has ended, spot taken, invalid coordinates)
        '''
        if self.is_game_over(): 
            return False

        if self.board[r][c] != EMPTY_SPOT: 
            return False

        if not (0 <= r <= self.board_size - 1 and 0 <= c <= self.board_size - 1): 
            return False

        self.board = self.current_player

        
        self.check_win_or_draw()

        if not self.is_game_over():
            self
        # Turn switching
        self.current_player = PLAYER_O if self.current_player == PLAYER_X else PLAYER_O
        
      
                
    def find_possible_moves(self): 
        """Find all empty spots on the board where a move can be made.

        Returns: 
            list of tuples: A list of (rows, cols) tuples representing possible moves. 
                            Return an empty list the game is over or no moves are possible. 
        """
        if self.is_game_over(): 
            return []
            
        possible_moves = []
        for r in range(self.board_size): 
            for c in range(self.board_size):
                if self.board[r][c] == EMPTY_SPOT:
                    possible_moves.append((i, j))
        return possible_moves

In [92]:
class Node: 
    # A root node is represented as having no parent or parent == None
    def __init__(self, parent, gameState): 
        self.parent = parent # parent node
        self.winCount = 0 # number of wins by going through this node
        self.visitCount = 0 # number of times going through this node
        self.children = []
        self.gameState = gameState # the current state of the game 
        self.possible_moves = self.gameState.find_possible_moves()
        self.isSimulated = False
        self.winSimulation = False

        # We need to incorporate the game to this somehow. 
    
    def get_UBC(self): 
        if self.visitCount != 0: 
            return self.winCount/self.visitCount + math.log(2) * math.sqrt(math.log(self.parent.visitcount)/self.visitCount)
        
    def select(self):
        '''
        Choose a leaf node. A MCST leaf node is the node where simulation hasn't been initiated. 
        '''
        chosen_node = None
        best_UBC = -np.inf
        for child in self.children: 
            if child.get_UBC() > best_UBC: 
                best_UBC = child.get_UBC()
                chosen_node = child 
        return child 
        
    def expand(self): 
        '''
        Create one (or more) child nodes and choose node C from one of them  
        '''
        action = random.choice(self.possible_moves)
        newGameState = self.gameState.play(action[0], action[1])
        newNode = Node(parent = self, 
                      gameState = newGameState)
        self.children.append(newNode)
        return newNode 
        
    def simulate(self): 
        '''
        Play randomly until the game reached the end. Update the result of the simulation 
        '''
        gameStateCopy = self.gameState.copy()
        while True: 
            possible_moves = gameStateCopy.find_possible_moves()
            randomAction = random.choice(possible_moves)
            gameStateCopy.play(possible_moves[0], possible_moves[1])
            if gameStateCopy.winPlayer is not None: 
                if gameStateCopy.winPlayer: 
                    self.winCount += 1 
                    self.isSimulated = True
                    self4  .winSimulation = True
                break
        
    def backpropagate(self): 
        '''
        Use the result of the playout to update information in the nodes on the path from C to R.
        '''
        while parent != None: 
            self.parent.winCount += 1 if self.winSimulation else 0
            parent = parent.parent

In [96]:
class MCST: 
    def __init__(self):
      self.root = Node(paent = None, gameState = TicTacToe())  
    def search(self): 
        self.root.select()
        self.root.expand()
        self.root.simulate()
        self.root.backpropagate()

In [None]:
tree = MCST() 
for i in range(100): 
    tree.