# The Coach on a full lifecycle

In [1]:
%load_ext autoreload
%autoreload 2

In [6]:
from alphazero.mcts import MCTS
from alphazero.coach import Coach
from alphazero.gomoku_game import GomokuGame as Gomoku, initial_stones
from alphazero.interfaces import TrainParams
from domoku.policies.heuristic_policy import HeuristicPolicy


In [7]:
BOARD_SIZE = 15

In [8]:

detector = HeuristicPolicy(BOARD_SIZE, cut_off=.1)
gomoku = Gomoku(BOARD_SIZE, detector=detector, initial=initial_stones(BOARD_SIZE, 4))

2022-07-11 12:05:33.337780: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  result = asarray(a).shape


----

# The contenders

In [5]:
# Common Performance Parameters

num_simulations=25
cpuct = 1.0

In [9]:
from alphazero.gomoku_players import HeuristicPlayer

policy_d = HeuristicPolicy(BOARD_SIZE, cut_off=.1)
mcts_d = MCTS(gomoku, policy_d, cpuct=1.0, num_simulations=100, model_threshold=.4)
defender = HeuristicPlayer(name="Charlie Champion", mcts=mcts_d, temperature=0.3)

In [10]:
from alphazero.gomoku_model import NeuralNetAdapter

network = NeuralNetAdapter(input_size=17)
print(f"Model class: {type(network.policy)}\n")
network.policy.summary()
challenger = MCTS(gomoku, network, cpuct=cpuct, num_simulations=num_simulations)

TypeError: Can't instantiate abstract class NeuralNetAdapter with abstract method get_advisable_actions

---

# The Coach

In [11]:
params = TrainParams(
    epochs_per_train=10,
    update_threshold=0.6,
    max_queue_length=8192,    # Number of game examples to keep to train the neural networks.
    num_simulations=25,
    arena_compare=2,         # Number of games to play during arena play to evaluate new network.
    cpuct=1.0,
    checkpoint_dir='./temperature/',
    load_model=False,
    load_folder_file=('/dev/models/8x100x50', 'best.pth.tar'),
    num_iters_for_train_examples_history=4,
    num_iterations=2,
    num_episodes=4,
    temperature_threshold=6
)
coach = Coach(gomoku, params=params)

In [13]:
coach.execute_episode(mcts_d)

KeyboardInterrupt: 