In [1]:
from mcts_simple import Game

class TicTacToe(Game):
    def __init__(self):
        self.board = {char + str(num + 1): " " for char in "abc" for num in range(3)}
        self.players = ["X", "O"]
        self.player_turn = 0
        self.prev_actions = []

    def win_conditions(self):
        return ((self.board["a1"], self.board["a2"], self.board["a3"]),
                (self.board["b1"], self.board["b2"], self.board["b3"]),
                (self.board["c1"], self.board["c2"], self.board["c3"]),
                (self.board["a1"], self.board["b1"], self.board["c1"]),
                (self.board["a2"], self.board["b2"], self.board["c2"]),
                (self.board["a3"], self.board["b3"], self.board["c3"]),
                (self.board["a1"], self.board["b2"], self.board["c3"]),
                (self.board["a3"], self.board["b2"], self.board["c1"]))

    def previous_player(self):
        self.player_turn = (self.player_turn - 1) % 2

    def next_player(self):
        self.player_turn = (self.player_turn + 1) % 2

    def render(self):
        print(f"{self.board['a1']}|{self.board['a2']}|{self.board['a3']}")
        print("-" * 5)
        print(f"{self.board['b1']}|{self.board['b2']}|{self.board['b3']}")
        print("-" * 5)
        print(f"{self.board['c1']}|{self.board['c2']}|{self.board['c3']}")
        print()
        
    def get_state(self):
        return tuple(self.board.values())

    def number_of_players(self):
        return len(self.players)

    def current_player(self):
        return self.players[self.player_turn]

    def possible_actions(self):
        return [pos for pos in self.board if self.board[pos] == " "]

    def take_action(self, action):
        if action not in self.possible_actions():
            raise RuntimeError("Action taken is invalid.")
        self.board[action] = self.current_player()
        self.prev_actions.append(action)
        self.next_player()

    def delete_last_action(self):
        if len(self.prev_actions) == 0:
            raise RuntimeError("There is no action to be deleted.")
        self.board[self.prev_actions.pop()] = " "
        self.previous_player()

    def has_outcome(self):
        return any([(player,) * 3 in self.win_conditions() for player in self.players]) or " " not in self.board.values()

    def winner(self):
        win_conditions = self.win_conditions()
        if not self.has_outcome():
            raise RuntimeError("winner() cannot be called when outcome is undefined.")
        for player in self.players:
            if (player,) * 3 in self.win_conditions():
                return player
        if " " not in self.board.values():
            return None
        else:
            raise RuntimeError

In [2]:
from mcts_simple import MCTS, UCT

# To train MCTS
print("To train MCTS")
mcts = MCTS(TicTacToe())
mcts.run(iterations = 300)
mcts._export("mcts.json")

# To import trained MCTS
print("To import trained MCTS")
mcts = MCTS(TicTacToe())
mcts._import("mcts.json")
mcts.self_play(activation = "best")

# To train UCT
print("To train UCT")
uct = UCT(TicTacToe())
uct.run(iterations = 300000)
uct._export("uct.json")

# To import trained UCT
print("To import trained UCT")
uct = UCT(TicTacToe())
uct._import("uct.json")
uct.self_play(activation = "best")

# To import trained MCTS to play from middle of game
print("To import trained MCTS to play from middle of game")
t = TicTacToe()
t.take_action("b2")
t.take_action("c1")
t.take_action("a2")
mcts = MCTS(t)
mcts._import("mcts.json")
mcts.self_play(activation = "best")

# To import trained UCT to play with human
print("To import trained UCT to play with human")
uct = UCT(TicTacToe())
uct._import("uct.json")
uct.play_with_human(activation = "best")

To train MCTS


HBox(children=(FloatProgress(value=0.0, description='Simulating', max=300.0, style=ProgressStyle(description_w…


To import trained MCTS
 | | 
-----
 | | 
-----
 | | 

X| | 
-----
 | | 
-----
 | | 

X| | 
-----
 | |O
-----
 | | 

X| | 
-----
 | |O
-----
X| | 

X| | 
-----
 | |O
-----
X| |O

X| |X
-----
 | |O
-----
X| |O

X| |X
-----
 |O|O
-----
X| |O

X| |X
-----
X|O|O
-----
X| |O

To train UCT


HBox(children=(FloatProgress(value=0.0, description='Simulating', max=300000.0, style=ProgressStyle(descriptio…


To import trained UCT
 | | 
-----
 | | 
-----
 | | 

 | | 
-----
 |X| 
-----
 | | 

 | | 
-----
 |X| 
-----
O| | 

 | | 
-----
X|X| 
-----
O| | 

 | | 
-----
X|X|O
-----
O| | 

 | | 
-----
X|X|O
-----
O| |X

O| | 
-----
X|X|O
-----
O| |X

O| | 
-----
X|X|O
-----
O|X|X

O|O| 
-----
X|X|O
-----
O|X|X

O|O|X
-----
X|X|O
-----
O|X|X

To import trained MCTS to play from middle of game
 |X| 
-----
 |X| 
-----
O| | 

 |X| 
-----
 |X|O
-----
O| | 

 |X| 
-----
 |X|O
-----
O|X| 

To import trained UCT to play with human
 | | 
-----
 | | 
-----
 | | 

 | | 
-----
 |X| 
-----
 | | 

Input user action: a3
 | |O
-----
 |X| 
-----
 | | 

 | |O
-----
 |X|X
-----
 | | 

Input user action: b1
 | |O
-----
O|X|X
-----
 | | 

X| |O
-----
O|X|X
-----
 | | 

Input user action: c3
X| |O
-----
O|X|X
-----
 | |O

X| |O
-----
O|X|X
-----
 |X|O

Input user action: a2
X|O|O
-----
O|X|X
-----
 |X|O

X|O|O
-----
O|X|X
-----
X|X|O

