## Step 1
Create and initialise game class.

In [1]:
from mcts_simple import *

class TicTacToe(Game):
    def __init__(self):
        self.board = [[" " for _ in range(3)] for _ in range(3)]
        self.players = ["X", "O"]

    def render(self):
        board = ""
        board += "|".join(self.board[0]) + "\n"
        board += "-----\n"
        board += "|".join(self.board[1]) + "\n"
        board += "-----\n"
        board += "|".join(self.board[2]) + "\n"
        print(board)
        
    def get_state(self):
        return [(self.board[row][col] == self.players[0]) - (self.board[row][col] == self.players[1]) for row in range(len(self.board)) for col in range(len(self.board[row]))]

    def number_of_players(self):
        return len(self.players)
    
    def current_player(self):
        return int(self.players[0] == "X")
    
    def possible_actions(self):
        return [row * 3 + col for row in range(len(self.board)) for col in range(len(self.board[row])) if self.board[row][col] == " "]
    
    def take_action(self, action):
        self.board[action // 3][action % 3] = self.players[0]
        self.players.append(self.players.pop(0))
    
    def has_outcome(self):
        result = False
        # check horizontal
        result |= self.board[0][0] == self.board[0][1] and self.board[0][0] == self.board[0][2] and self.board[0][0] != " "
        result |= self.board[1][0] == self.board[1][1] and self.board[1][0] == self.board[1][2] and self.board[1][0] != " "
        result |= self.board[2][0] == self.board[2][1] and self.board[2][0] == self.board[2][2] and self.board[2][0] != " "
        # check vertical
        result |= self.board[0][0] == self.board[1][0] and self.board[0][0] == self.board[2][0] and self.board[0][0] != " "
        result |= self.board[0][1] == self.board[1][1] and self.board[0][1] == self.board[2][1] and self.board[0][1] != " "
        result |= self.board[0][2] == self.board[1][2] and self.board[0][2] == self.board[2][2] and self.board[0][2] != " "
        # check diagonal
        result |= self.board[0][0] == self.board[1][1] and self.board[0][0] == self.board[2][2] and self.board[0][0] != " "
        result |= self.board[0][2] == self.board[1][1] and self.board[0][2] == self.board[2][0] and self.board[0][2] != " "
        return result or not any(" " in space for space in self.board)

    def winner(self):
        winners = []
        # check horizontal
        winners += [self.board[0][0] == "X"] if self.board[0][0] == self.board[0][1] and self.board[0][0] == self.board[0][2] and self.board[0][0] != " " else []
        winners += [self.board[1][0] == "X"] if self.board[1][0] == self.board[1][1] and self.board[1][0] == self.board[1][2] and self.board[1][0] != " " else []
        winners += [self.board[2][0] == "X"] if self.board[2][0] == self.board[2][1] and self.board[2][0] == self.board[2][2] and self.board[2][0] != " " else []
        # check vertical
        winners += [self.board[0][0] == "X"] if self.board[0][0] == self.board[1][0] and self.board[0][0] == self.board[2][0] and self.board[0][0] != " " else []
        winners += [self.board[0][1] == "X"] if self.board[0][1] == self.board[1][1] and self.board[0][1] == self.board[2][1] and self.board[0][1] != " " else []
        winners += [self.board[0][2] == "X"] if self.board[0][2] == self.board[1][2] and self.board[0][2] == self.board[2][2] and self.board[0][2] != " " else []
        # check diagonal
        winners += [self.board[0][0] == "X"] if self.board[0][0] == self.board[1][1] and self.board[0][0] == self.board[2][2] and self.board[0][0] != " " else []
        winners += [self.board[0][2] == "X"] if self.board[0][2] == self.board[1][1] and self.board[0][2] == self.board[2][0] and self.board[0][2] != " " else []
        # check draw
        if len(winners) == 0 and not any(" " in space for space in self.board):
            winners = [player == "X" for player in self.players]
        return winners

In [2]:
game = TicTacToe()

## Step 2
Train MCTS. You can choose whether to allow transpositions or not.

In [3]:
tree = UCT(game, allow_transpositions = True, training = True)
tree.self_play(iterations = 10000)

Training:   0%|          | 0/10000 [00:00<?, ?it/s]

## Step 3
Test MCTS via self-play.

In [4]:
tree.training = False
tree.self_play()

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

 | | 
-----
 | | 
-----
 | | 

 | | 
-----
 |X| 
-----
 | | 

O| | 
-----
 |X| 
-----
 | | 

O| | 
-----
X|X| 
-----
 | | 

O| | 
-----
X|X|O
-----
 | | 

O| |X
-----
X|X|O
-----
 | | 

O| |X
-----
X|X|O
-----
O| | 

O|X|X
-----
X|X|O
-----
O| | 

O|X|X
-----
X|X|O
-----
O|O| 

O|X|X
-----
X|X|O
-----
O|O|X



## Step 4 (Optional)
Export MCTS if you want to save the tree via `save()` and import MCTS if you want to retrieve previously trained tree via `load()`.

In [5]:
tree.save("tictactoe.mcts")

## Step 5 (Optional)
Try to play against the MCTS and see who will emerge victorious.

In [6]:
import random
from copy import deepcopy

human_player = 1 # X

game = TicTacToe()
tree = MCTS(game, allow_transpositions = True, training = False)
tree.load("tictactoe.mcts")

node = tree.root
while not game.has_outcome():
    game.render()
    actions = game.possible_actions()
    if game.current_player() == human_player:
        print("Possible actions:", actions)
        action = int(input("> "))
        assert action in actions
        if node is not None:
            node = node.children[action] if action in node.children else None
    else:
        if node is not None and len(node.children) > 0:
            action = node.choose_best_action(tree.training)
            node = node.children[action]
        else:
            action = random.choice(actions)
            node = None
    game.take_action(action)
game.render()

 | | 
-----
 | | 
-----
 | | 

Possible actions: [0, 1, 2, 3, 4, 5, 6, 7, 8]
> 4
 | | 
-----
 |X| 
-----
 | | 

O| | 
-----
 |X| 
-----
 | | 

Possible actions: [1, 2, 3, 5, 6, 7, 8]
> 2
O| |X
-----
 |X| 
-----
 | | 

O| |X
-----
 |X| 
-----
O| | 

Possible actions: [1, 3, 5, 7, 8]
> 3
O| |X
-----
X|X| 
-----
O| | 

O| |X
-----
X|X|O
-----
O| | 

Possible actions: [1, 7, 8]
> 1
O|X|X
-----
X|X|O
-----
O| | 

O|X|X
-----
X|X|O
-----
O|O| 

Possible actions: [8]
> 8
O|X|X
-----
X|X|O
-----
O|O|X

