In [63]:
import numpy as np
from anytree import RenderTree
import logging
import random

import log_set
from anytree import search
from tqdm import notebook

In [64]:
%load_ext autoreload
%autoreload 2

from tictac_rl import TicTacToe, MinMaxTree, MCTS, CROSS_PLAYER, CIRCLE_PLAYER

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [65]:
random.seed(100)

In [66]:
env = TicTacToe(3, 3, 3, CROSS_PLAYER)

In [67]:
def render_tree(node, max_level=None):
    for delim, _, child_node in RenderTree(node, maxlevel=max_level):
        print(f"{delim}{child_node}")

In [68]:
tree = MCTS()

In [69]:
def simulate(tree, env, num_sim: int):
    rewards = []
    for i in notebook.trange(num_sim):
        is_end = False
        env.reset()
        is_max = True

        free_space = env.getEmptySpaces()
        game_hist = []
        start_node = tree.root

        while not is_end:
            if env.curTurn != env._start_player:
                step, start_node = tree.best_move(env, start_node, is_max)
            else:
                step = random.choice(free_space)

            old_step = env.curTurn
            (state_str, free_space, _ ), reward, is_end = env.step(step)
            
            game_hist.append(state_str)


            if old_step == env._start_player:
                start_node = tree.add_node(start_node, state_str, env)
            if is_end:
                rewards.append(reward)
    return rewards

In [75]:
rewards = simulate(tree, env, 10_000)

  0%|          | 0/10000 [00:00<?, ?it/s]

In [80]:
rewards = simulate(tree, env, 5_000)

  0%|          | 0/5000 [00:00<?, ?it/s]

In [81]:
render_tree(tree.root, 4)

empty 0/22000
├── 211111111 11278/18068
│   ├── 201111111 314/4999
│   │   ├── 202111111 98/385
│   │   ├── 201211111 169/576
│   │   ├── 201121111 931/2210
│   │   ├── 201112111 96/379
│   │   ├── 201111211 110/417
│   │   ├── 201111121 177/592
│   │   └── 201111112 118/439
│   ├── 210111111 311/4967
│   │   ├── 220111111 104/441
│   │   ├── 210211111 152/577
│   │   ├── 210121111 148/566
│   │   ├── 210112111 156/589
│   │   ├── 210111211 648/1667
│   │   ├── 210111121 139/541
│   │   └── 210111112 155/586
│   ├── 211011111 321/5072
│   │   ├── 221011111 177/575
│   │   ├── 212011111 441/1219
│   │   ├── 211021111 417/1065
│   │   ├── 211012111 154/517
│   │   ├── 211011211 183/590
│   │   ├── 211011121 160/531
│   │   └── 211011112 177/575
│   ├── 211101111 302/4873
│   │   ├── 221101111 148/691
│   │   ├── 212101111 123/605
│   │   ├── 211201111 133/639
│   │   ├── 211102111 133/640
│   │   ├── 211101211 359/1190
│   │   ├── 211101121 118/588
│   │   └── 211101112 98/520
│   ├── 21

In [82]:
(rewards.count(env._start_player) + rewards.count(0)) / len(rewards)

0.9592

In [83]:
rewards.count(0) / len(rewards)

0.1184