In [58]:
import numpy as np
from tqdm import tqdm

In [255]:
import random


class TicTacToe:
    # Inspiration: https://geekflare.com/tic-tac-toe-python-code/
    def __init__(self):
        self.board = []
    def create_board(self):
        for i in range(3):
            row = []
            for j in range(3):
                row.append('-')
            self.board.append(row)
        self.board = np.array(self.board)

    def get_random_first_player(self):
        return random.randint(0, 1)

    def fix_spot(self, board, row, col, player):
        board[row][col] = player
        return board

    def is_player_win(self, player):
        win = None
        n = len(self.board)
        # checking rows
        for i in range(n):
            win = True
            for j in range(n):
                if self.board[i][j] != player:
                    win = False
                    break
            if win:
                return win
        # checking columns
        for i in range(n):
            win = True
            for j in range(n):
                if self.board[j][i] != player:
                    win = False
                    break
            if win:
                return win
        # checking diagonals
        win = True
        for i in range(n):
            if self.board[i][i] != player:
                win = False
                break
        if win:
            return win
        win = True
        for i in range(n):
            if self.board[i][n - 1 - i] != player:
                win = False
                break
        if win:
            return win
        return False

        for row in self.board:
            for item in row:
                if item == '-':
                    return False
        return True

    def is_board_filled(self):
        for row in self.board:
            for item in row:
                if item == '-':
                    return False
        return True

    def swap_player_turn(self, player):
        return 'X' if player == 'O' else 'O'

    def show_board(self):
        for row in self.board:
            for item in row:
                print(item, end=" ")
            print()
        print()

    def create_start_position(self):
        self.board[1][0] = 'X'
        self.board[1][1] = 'X'
        self.board[0][1] = 'O'
        self.board[1][2] = 'O'

    def valid_moves(self):
        x = np.where(self.board == '-')
        return np.asarray(x).T

    def pick_random_move(self):
        moves = self.valid_moves()
        return moves[np.random.randint(moves.shape[0], size=1), :][0] # since return row of 2d array

    def return_random_rollout(self, board, player, do_print=False):
        while True:
            move = self.pick_random_move()
            board = self.fix_spot(board, move[0], move[1], player)
            if self.is_player_win(player):
                if do_print: print(f"Player {player} wins the game!")
                if player == 'X': reward = 1 ; win = 1
                if player == 'O': reward = -1 ; win = 0
                break
            if self.is_board_filled():
                if do_print: print("Match Draw!")
                reward = win = 0
                break
            # Swap turn
            player = self.swap_player_turn(player)
            if do_print: self.show_board()
        if do_print: self.show_board() # show winning board
        if do_print: print([win, reward])
        return [win, reward]


    def start_random(self, do_print = False):
        self.create_board() # empty board
        self.player = 'X' # x starts
        self.create_start_position() # create start position
        if do_print: self.show_board()

        return self.return_random_rollout(self.board, self.player,do_print=do_print )


# starting the game
tic_tac_toe = TicTacToe()
winner = tic_tac_toe.start_random()

In [261]:
tic_tac_toe = TicTacToe()
winner = tic_tac_toe.start_random(do_print=True)

- O - 
X X O 
- - - 

- O - 
X X O 
X - - 

- O O 
X X O 
X - - 

- O O 
X X O 
X - X 

Player O wins the game!
O O O 
X X O 
X - X 

[0, -1]


In [253]:
scores = np.zeros(3) # X, draw, O
for i in tqdm(range(10000)):
    tic_tac_toe = TicTacToe()
    win_rew = tic_tac_toe.start_random()
    if win_rew[1] == 1:        
        scores[0] += 1
    if win_rew[1] == 0:        
        scores[1] += 1
    if win_rew[1] == -1:        
        scores[2] += 1

print("X, draw, O") # [ 13475.  53609. -13475.]
print(scores)
    

100%|██████████| 10000/10000 [00:02<00:00, 3511.50it/s]

X, draw, O
[6259. 2079. 1662.]





In [262]:
class Node(TicTacToe):
    def __init__(self, board, player, parent=None):
        self.board = board
        self.player = player
        self.children = []
        self.parent = parent
        self.reward = 0
        self.wins = 0
        self.visits = 0
        self.UCB = np.inf # correct???

    def is_terminal(self):
        if self.is_player_win(self.player) or self.is_board_filled():
            return True
        else:
            return False

    def reward_board(self):
        if self.is_terminal():
            if self.is_player_win('X'): # if current player wins
                return 1
            elif self.is_player_win('O'):
                return -1
            elif self.is_board_filled():
                return 0

    def generate_all_possible_children(self):
        all_moves = self.valid_moves()
        for move in all_moves:
            player_child = self.swap_player_turn(self.player)
            board_child = self.fix_spot(self.board, move[0], move[1], player_child)
            node_child = Node(board_child, player_child, parent = self) ## CORRECT??
            self.children[node_child] = node_child

        ##########################
    
    def update_UCB(self):
        if self.parent:
            self.UCB = self.wins/self.plays + np.sqrt(2) * np.sqrt(np.log(self.parent.plays)/self.plays)

    def return_max_UCB(self):
        if self.children:
            return max(self.children, key=self.UCB) #?

class MCTS:
    def __init__(self, root_node):
        self.root_node = root_node
        self.nodes = dict()
        self.nodes[root_node] = root_node

    def take_step(self, node):
        path = self.selection(node)
        leaf_node = path[-1]
        self.expansion(leaf_node)
        win_rew = self.simulation()
        self.backtrack(path, win_rew)


    def selection(self, node):
        path = []
        while True:
            path.append(node)
            if len(node.children) == 0: # no children
                return path
            node = node.return_max_UCB() # return child with highest UCB score (initialises at inf)
   
    def expansion(self, node):
        # Add child with max UCB
        node.generate_all_possible_children()
        self.nodes[node] = node.return_max_UCB()

    def simulation(self, node):
        win_rew = node.return_random_rollout()
        return win_rew

    def backtrack(self, path, win_rew):
        for node in reversed(path):
            node.wins += win_rew[0]
            node.reward += win_rew[1]
            node.visits += 1
            node.update_UCB()


        

    


        
    