# Minimax Tree Search with Policy/Value Functions

In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
from domoku.interfaces import Game, AbstractGanglion
from typing import List, Any, Optional
import numpy as np
from domoku.minimax import MinimaxSearch

### Test Stuff

In [2]:
class TestState:
    def is_terminal(self) -> bool:
        return self._terminal

    def get_successors(self):
        return self._successors

    def __init__(self, terminal: bool = False, successors=None, value=0):
        self._terminal = terminal
        self._successors = successors
        self.value = value

    def move(self, move: int) -> 'TestState':
        assert move in range(len(self._successors)), "Move can only be one of 0 or 1 or 2"
        return self._successors[move]

    def __str__(self):
        if self._terminal:
            return str(self.value)
        else:
            return f"{str(self._successors)}"

    __repr__ = __str__


class Policy(AbstractGanglion):
    """
    A trivial policy for testing
    """
    def winner(self, state: Any) -> Optional[int]:
        return 1 if state.is_terminal() else None

    def sample(self, state, n=1) -> List[int]:
        return list(range(len(state.get_successors())))

    def eval(self, state):
        return state.value


class TestGame(Game):

    def get_state(self: Any) -> Any:
        raise NotImplementedError

    def do_move(self, move: Any) -> Any:
        raise NotImplementedError

    @staticmethod
    def successor(state: Any, move: Any):
        return state.get_successors()[move]


def create_state(depth, min_depth=0, num_successors=2):
    if depth == 0 or (np.random.choice([False]*6+[True]) and min_depth > depth):
        return TestState(terminal=True, value=np.random.choice([1, 2, 3, 4, 5, 6]))

    successors = [create_state(depth - 1, min_depth, num_successors)
                  for _ in range(num_successors)]

    return TestState(False, successors)

In [3]:
minimax = MinimaxSearch(TestGame(), policy=Policy(), max_depth=5, max_breadth=3)

In [4]:
state = create_state(4, 2, num_successors=3)

#left = State(True, value=4)
#state = State(False, left=left, right=State(False, left=State(True, value=6), right=State(True, value=2)))

policy = Policy()
search = MinimaxSearch(TestGame(), policy, 4, 3)
value, history = search.minimax(state, 10, True)


other = state
for move in history:
    if not other.is_terminal():
        other = other.move(move)

assert policy.eval(other) == value

print("All good!")
print(value, history)
# print(state)



All good!
4 [1, 0, 0, 1]


# Policy-Advised TreeSeach - At Last!

In [12]:
from domoku.policies.softadvice import MaxInfluencePolicyParams, MaxInfluencePolicy
from domoku.policies.maximal_criticality import MaxCriticalityPolicy

SIZE=15
from domoku.ddpg import NxNx4Game
from domoku import data

hard_policy = MaxCriticalityPolicy(SIZE, overconfidence=2)

params = MaxInfluencePolicyParams(
    n=SIZE,
    sigma=.7,
    iota=3,
    radial_constr = [.0625, .125, .25, .5],
    radial_obstr = [-.0625, -.125, -.25, -.5]
)
heuristics = MaxInfluencePolicy(params, criticality_model=hard_policy)

initial_state = data.create_nxnx4(size=15, stones=[('H', 8)])
search = MinimaxSearch(NxNx4Game(initial_state), heuristics, 8, 4)
search.minimax(initial_state, 6)

(0, [(6, 6), (8, 8), (9, 8), (7, 9), (6, 10), (7, 8)])