# The Coach on a full lifecycle

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf
from alphazero.mcts import MCTS
from alphazero.coach import Coach
from alphazero.gomoku_game import GomokuGame as Game
from alphazero.interfaces import TrainParams
from alphazero.gomoku_model import NeuralNetAdapter
from domoku.policies.maximal_criticality import MaxCriticalityPolicy
from domoku.policies.softadvice import MaxInfluencePolicy, MaxInfluencePolicyParams
from domoku.policies import softadvice
from domoku.constants import *

In [3]:
BOARD_SIZE = 15

In [4]:
detector = MaxCriticalityPolicy(BOARD_SIZE)
game = Game(15, detector=detector, initial='H8')

2022-06-13 19:22:55.299675: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  result = asarray(a).shape


----

# The contenders

In [5]:
# Common Performance Parameters

num_simulations=25
cpuct = 1.0

In [6]:
############################################################
# In the blue corner: The heuristic defender!

def given_heuristic_brain():
    hard_policy = MaxCriticalityPolicy(board_size=BOARD_SIZE, overconfidence=5.0)
    mi_params = MaxInfluencePolicyParams(
        board_size=BOARD_SIZE,
        sigma=.6,
        iota=6,
        radial_constr=[.0625, .125, .25, .5],
        radial_obstr=[-.0625, -.125, -.25, -.5]
    )
    policy = MaxInfluencePolicy(mi_params, criticality_model=hard_policy, pov=BLACK)
    return softadvice.NeuralNetAdapter(policy)

brain = given_heuristic_brain()
defender = MCTS(game, brain, cpuct=cpuct, num_simulations=num_simulations)

In [7]:
############################################################

# In the red corner: A fresh neural network

network = NeuralNetAdapter(input_size=17)
print(f"Model class: {type(network.policy)}\n")
network.policy.summary()
challenger = MCTS(game, network, cpuct=cpuct, num_simulations=num_simulations)

Model class: <class 'alphazero.gomoku_model.GomokuModel'>

Model: "gomoku_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           multiple                  11648     
                                                                 
 Potential_0 (Conv2D)        multiple                  123936    
                                                                 
 Potential_1 (Conv2D)        multiple                  123936    
                                                                 
 Potential_2 (Conv2D)        multiple                  123936    
                                                                 
 Potential_3 (Conv2D)        multiple                  123936    
                                                                 
 Potential_4 (Conv2D)        multiple                  123936    
                                                             

---

# The Coach

In [13]:
params = TrainParams(
    epochs_per_train=10,
    update_threshold=0.6,
    max_queue_length=8192,    # Number of game examples to keep to train the neural networks.
    num_simulations=25,
    arena_compare=2,         # Number of games to play during arena play to evaluate new network.
    cpuct=1.0,
    checkpoint_dir='./temperature/',
    load_model=False,
    load_folder_file=('/dev/models/8x100x50', 'best.pth.tar'),
    num_iters_for_train_examples_history=4,
    num_iterations=2,
    num_episodes=4,
    temperature_threshold=6
)
coach = Coach(game, params=params)

In [14]:
coach.train(defender, challenger)

Iteraction 1 of 3


   Self Play: 100%|██████████| 4/4 [00:24<00:00,  6.11s/it]


   Challenger to learn from the results


   Training: 100%|██████████| 10/10 [01:16<00:00,  7.69s/it]


Epochs: 10, Loss: 7.503915309906006
   Challenger meets Defender in the Arena


Arena.play_games (1):   0%|          | 0/1 [00:00<?, ?it/s]


TypeError: GomokuGame.get_game_ended() takes 2 positional arguments but 3 were given