In [1]:
import numpy as np
from anytree import RenderTree
import logging
import random

import log_set
from tqdm import notebook

In [2]:
%load_ext autoreload
%autoreload 2

from tictac_rl import TicTacToe, MinMaxTree, MCTS, CROSS_PLAYER, CIRCLE_PLAYER, simulate
from tictac_rl import RandomPolicy, TreePolicy

In [20]:
random.seed(100)

In [39]:
env = TicTacToe(4, 4, 4)

In [22]:
def render_tree(node, max_level=None):
    for delim, _, child_node in RenderTree(node, maxlevel=max_level):
        print(f"{delim}{child_node}")

In [28]:
tree = MinMaxTree.load_from_dump("./trees/3/cross/3_3_start_cross.pickle")
# tree2 = MinMaxTree.load_from_dump("./trees/3/cross/3_3_start_cross.pickle")

In [40]:
tree_circle = MCTS(env)
tree_cross = MCTS(env)

In [57]:
cross_policy = RandomPolicy()

In [58]:
circle_policy = RandomPolicy()

In [59]:
for _ in notebook.trange(10_000):
    simulate(env, cross_policy, circle_policy)

  0%|          | 0/10000 [00:00<?, ?it/s]

In [60]:
rewards = []

for _ in notebook.trange(5_000):
    rewards.append(simulate(env, cross_policy, circle_policy))

  0%|          | 0/5000 [00:00<?, ?it/s]

In [None]:
def simulate_game(tree, env, num_sim: int, tree_player: int):
    rewards = []
    for i in notebook.trange(num_sim):
        is_end = False
        env.reset()
        is_max = True

        free_space = env.getEmptySpaces()
        game_hist = []
        start_node = tree.root

        if env.curTurn != tree_player:
            tree.add_start_nodes(env)
            step = random.choice(free_space)
            (state_str, free_space, _ ), reward, is_end = env.step(step)
            start_node = tree.find_game_state(start_node, state_str)

        while not is_end:
            if env.curTurn == tree_player:
                step, start_node = tree.best_move(env, start_node, is_max)
            else:
                step = random.choice(free_space)
                
            old_step = env.curTurn
            (state_str, free_space, _ ), reward, is_end = env.step(step)

            game_hist.append(state_str)

            if old_step != tree_player:
                start_node = tree.add_node(start_node, state_str, env)
            if is_end:
                rewards.append(reward)
    return rewards

In [None]:
rewards = simulate(tree, env, 200, CIRCLE_PLAYER)

  0%|          | 0/200 [00:00<?, ?it/s]

In [None]:
rewards = simulate(tree, env, 5_000, CIRCLE_PLAYER)

  0%|          | 0/5000 [00:00<?, ?it/s]

In [None]:
render_tree(tree.root, 4)

empty 0/0
├── 211111111 45/170
│   ├── 201111111 0/21
│   │   ├── 202111111 4/7
│   │   ├── 201211111 3/6
│   │   ├── 201121111 0/5
│   │   ├── 201112111 2/5
│   │   ├── 201111211 2/6
│   │   ├── 201111121 2/6
│   │   └── 201111112 0/4
│   ├── 210111111 0/21
│   │   ├── 220111111 2/7
│   │   ├── 210211111 2/7
│   │   ├── 210121111 0/5
│   │   ├── 210112111 0/7
│   │   ├── 210111211 0/6
│   │   ├── 210111121 2/7
│   │   └── 210111112 3/8
│   ├── 211011111 0/22
│   │   ├── 221011111 1/5
│   │   ├── 212011111 2/5
│   │   ├── 211021111 0/4
│   │   ├── 211012111 3/6
│   │   ├── 211011211 2/5
│   │   ├── 211011121 1/5
│   │   └── 211011112 1/8
│   ├── 211101111 0/21
│   │   ├── 221101111 3/7
│   │   ├── 212101111 2/6
│   │   ├── 211201111 2/6
│   │   ├── 211102111 3/7
│   │   ├── 211101211 1/5
│   │   ├── 211101121 2/8
│   │   └── 211101112 2/6
│   ├── 211110111 0/21
│   │   ├── 221110111 1/5
│   │   ├── 212110111 1/5
│   │   ├── 211210111 0/5
│   │   ├── 211120111 1/5
│   │   ├── 211110211 

In [61]:
(rewards.count(env._start_player) + rewards.count(0)) / len(rewards)

0.7312

In [62]:
rewards.count(0) / len(rewards)

0.4154

In [None]:
env = TicTacToe(3, 3, 3)

In [None]:
tree = MinMaxTree.load_from_dump(r"trees\3\cross\3_3_start_cross.pickle")

In [None]:
import itertools

def simulate_game(tree: MinMaxTree, start_player: int):
    env = TicTacToe(3, 3, 3, start_player)
    logger = logging.getLogger("tictoc")

    for start_x, start_y in itertools.product(range(env.n_rows), range(env.n_cols)):
        env.reset()
        logger.info("start")
        is_max = True
        start_node = tree.root
        (hash_state, *_), reward, is_end = env.step((start_x, start_y))
        start_node = tree.find_game_state(tree.root, hash_state)
        hashes = [hash_state]

        while True:
            is_max = not is_max
            move, start_node = tree.best_move(start_node, env, is_max)
            (hash_state, *_), reward, is_end = env.step(move)
            hashes.append(hash_state)

            if is_end:
                if reward != 0:
                    for hash_str in hashes:
                        logger.info(hash_str)
                        logger.info(env.from_state_str(hash_str))
                    logger.handlers[0].flush()
                assert reward == 0
                break

In [None]:
simulate_game(tree, CROSS_PLAYER)