# Deep Reinforcement Learning using AlphaZero methodology

Please see https://applied-data.science/blog/how-to-build-your-own-alphazero-ai-using-python-and-keras/ for further notes on the codebase

## 1. First load the core libraries

In [1]:
# -*- coding: utf-8 -*-
# %matplotlib inline
import importlib
import pickle
import random
from shutil import copyfile

import numpy as np
from keras.utils import plot_model

from agent import Agent
from funcs import playMatches, playMatchesBetweenVersions
from games.connect4.connect4 import Connect4, Connect4State
from games.metasquares.metasquares import MetaSquares, MetaSquaresState
from games.quoridor.quoridor import Quoridor, QuoridorState
import initialise
import loggers as lg
from memory import Memory
from model import Residual_CNN
from settings import run_folder, run_archive_folder

np.set_printoptions(suppress=True)

%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


## 2. Now run this block to start the learning process

This block loops for ever, continually learning from new game data.

The current best model and memories are saved in the run folder so you can kill the process and restart from the last checkpoint.

In [2]:
lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*')
lg.logger_main.info('=*=*=*=*=*=.      NEW LOG      =*=*=*=*=*')
lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*')

# env = Connect4()
# env = MetaSquares()
env = Quoridor()
game_name = env.name
print('Game name: ', game_name, end='\n\n')
# If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py')

import config

######## LOAD MEMORIES IF NECESSARY ########

if initialise.INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load(open(run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

print('\n')

######## CREATE THE PLAYERS ########

current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)
#user_player = User('player1', env.state_size, env.action_size)
iteration = 0

while 1:

    iteration += 1
    importlib.reload(lg)
    importlib.reload(config)
    
    print('ITERATION NUMBER ' + str(iteration))
    
    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(game_name, best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)
    print('\n')
    
    memory.clear_stmemory()
    
    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        ######## RETRAINING ########
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:
            pickle.dump( memory, open( run_folder + "memory/memory" + str(iteration).zfill(4) + ".p", "wb" ) )

        lg.logger_memory.info('====================')
        lg.logger_memory.info('NEW MEMORIES')
        lg.logger_memory.info('====================')
        
        memory_samp = random.sample(memory.ltmemory, min(1000, len(memory.ltmemory)))
        
        for s in memory_samp:
            current_value, current_probs, _ = current_player.get_preds(s['state'])
            best_value, best_probs, _ = best_player.get_preds(s['state'])

            lg.logger_memory.info('MCTS VALUE FOR %s: %f', s['playerTurn'], s['value'])
            lg.logger_memory.info('CUR PRED VALUE FOR %s: %f', s['playerTurn'], current_value)
            lg.logger_memory.info('BES PRED VALUE FOR %s: %f', s['playerTurn'], best_value)
            lg.logger_memory.info('THE MCTS ACTION VALUES: %s', ['%.2f' % elem for elem in s['AV']]  )
            lg.logger_memory.info('CUR PRED ACTION VALUES: %s', ['%.2f' % elem for elem in  current_probs])
            lg.logger_memory.info('BES PRED ACTION VALUES: %s', ['%.2f' % elem for elem in  best_probs])
            lg.logger_memory.info('ID: %s', s['state'].id)
            lg.logger_memory.info('INPUT TO MODEL: %s', current_player.model.convertToModelInput(s['state']))

            s['state'].render(lg.logger_memory)
            
        ######## TOURNAMENT ########
        print('TOURNAMENT...')
        scores, _, points, sp_scores = playMatches(game_name, best_player, current_player, config.EVAL_EPISODES, lg.logger_tourney, turns_until_tau0 = 0, memory = None)
        print('\nSCORES')
        print(scores)
        print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
        print(sp_scores)
        #print(points)

        print('\n\n')

        if scores['current_player'] > scores['best_player'] * config.SCORING_THRESHOLD:
            best_player_version = best_player_version + 1
            best_NN.model.set_weights(current_NN.model.get_weights())
            best_NN.write(env.name, best_player_version)

    else:
        print('MEMORY SIZE: ' + str(len(memory.ltmemory)))

Game name:  quoridor

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



ITERATION NUMBER 1
BEST PLAYER VERSION 0
SELF PLAYING 25 EPISODES...
EPISODE:  1 

Turn: 1
Player: 1
Allowed: [1, 3, 5, 7, 9, 10, 11, 12, 14, 16, 18, 20, 22, 23, 24, 25, 27, 29, 31, 33, 35, 36, 37, 38, 40, 42, 43, 44, 46, 48, 49, 50, 51, 53, 55]
Action: (3, 2)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 1 3 0 3 0]]
Remaining nums: {1: 4, 2: 5}

Turn: 2
Player: -1
Allowed: [1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 14, 17, 18, 20, 25, 27, 29, 31, 33, 35, 36, 37, 38, 40, 42, 44, 46, 48, 49, 50, 51]
Action: (2, 1)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 5 4 4 4 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0

Player: 1
Allowed: [3, 5, 7, 9, 10, 11, 12, 16, 18, 20, 23, 24, 25, 29, 31, 33, 35, 36, 37, 38, 40, 44, 46, 51, 53, 55]
Action: (7, 6)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 3 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 0 3 1 3 0 3 0]]
Remaining nums: {1: 3, 2: 4}

Turn: 4
Player: -1
Allowed: [2, 3, 5, 6, 7, 9, 10, 11, 12, 16, 17, 18, 20, 23, 24, 25, 29, 31, 33, 35, 36, 37, 38, 44]
Action: (0, 6)
[[0 3 0 3 0 3 2 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 3 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 0 3 1 3 0 3 0]]
Remaining nums: {1: 3, 2: 4}

Turn: 5
Player: 1
Allowed: [3, 5, 7, 9, 10, 11, 12, 16, 18, 20, 23, 24, 25, 29, 31, 33, 35, 36, 37, 38, 44, 53, 55]
Action: (3, 6)
[[0 3 0 3 0 3 2 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 3 3 4 4 4]
 [0 5 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 4 4 4 3

Player: 1
Allowed: [1, 3, 5, 7, 9, 10, 11, 12, 14, 16, 18, 20, 22, 23, 24, 25, 27, 29, 31, 33, 35, 36, 37, 38, 40, 42, 43, 44, 46, 48, 49, 50, 51, 53, 55]
Action: (7, 2)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 3 3 3]
 [0 3 0 3 1 3 0 3 0]]
Remaining nums: {1: 4, 2: 5}

Turn: 2
Player: -1
Allowed: [1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 14, 16, 17, 18, 20, 22, 23, 24, 25, 27, 29, 31, 33, 35, 36, 37, 38, 40, 44, 46, 51]
Action: (1, 6)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 3 3 3]
 [0 3 0 3 1 3 0 3 0]]
Remaining nums: {1: 4, 2: 4}

Turn: 3
Player: 1
Allowed: [1, 3, 5, 9, 10, 14, 16, 18, 20, 22, 23, 24, 25, 27, 29, 31, 33, 35, 36, 37, 38, 40, 44, 46, 51, 53, 55]
Action: (1, 0)
[[0 3 0 3 2 3 0 3 0]
 [4 4 4 3 3 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3

Player: 1
Allowed: [4]
Action: (0, 4)
[[0 3 0 5 1 3 0 3 0]
 [4 4 4 5 3 3 4 4 4]
 [0 3 0 5 0 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 5 0 3 0 3 0 5 0]
 [3 5 4 4 4 3 3 5 3]
 [0 5 2 3 0 3 0 5 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]]
Remaining nums: {1: 0, 2: 1}

EPISODE:  4 

Turn: 1
Player: 1
Allowed: [1, 3, 5, 7, 9, 10, 11, 12, 14, 16, 18, 20, 22, 23, 24, 25, 27, 29, 31, 33, 35, 36, 37, 38, 40, 42, 43, 44, 46, 48, 49, 50, 51, 53, 55]
Action: (8, 6)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 1 3 0]]
Remaining nums: {1: 5, 2: 5}

Turn: 2
Player: -1
Allowed: [1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 14, 16, 17, 18, 20, 22, 23, 24, 25, 27, 29, 31, 33, 35, 36, 37, 38, 40, 42, 44, 46, 48, 49, 50, 51]
Action: (2, 4)
[[0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 

Player: 1
Allowed: [52]
Action: (8, 0)
[[2 3 0 5 0 3 0 5 0]
 [3 3 3 5 4 4 4 5 3]
 [0 3 0 5 0 3 0 5 0]
 [3 3 4 4 4 3 4 4 4]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 4 4 4 3 3]
 [0 5 0 3 0 3 0 5 0]
 [3 3 3 3 4 4 4 5 3]
 [1 3 0 3 0 3 0 5 0]]
Remaining nums: {1: 0, 2: 1}

Turn: 26
Player: -1
Allowed: [1, 2, 13, 18, 29, 42]
Action: (0, 2)
[[0 3 2 5 0 3 0 5 0]
 [3 3 3 5 4 4 4 5 3]
 [0 3 0 5 0 3 0 5 0]
 [3 3 4 4 4 3 4 4 4]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 4 4 4 3 3]
 [0 5 0 3 0 3 0 5 0]
 [3 3 3 3 4 4 4 5 3]
 [1 3 0 3 0 3 0 5 0]]
Remaining nums: {1: 0, 2: 1}

Turn: 27
Player: 1
Allowed: [39]
Action: (6, 0)
[[0 3 2 5 0 3 0 5 0]
 [3 3 3 5 4 4 4 5 3]
 [0 3 0 5 0 3 0 5 0]
 [3 3 4 4 4 3 4 4 4]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 4 4 4 3 3]
 [1 5 0 3 0 3 0 5 0]
 [3 3 3 3 4 4 4 5 3]
 [0 3 0 3 0 3 0 5 0]]
Remaining nums: {1: 0, 2: 1}

Turn: 28
Player: -1
Allowed: [0, 15, 18, 29, 42]
Action: (2, 2)
[[0 3 0 5 0 3 0 5 0]
 [3 3 3 5 4 4 4 5 3]
 [0 3 2 5 0 3 0 5 0]
 [3 3 4 4 4 3 4 4 4]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 4 4 4 3

Player: -1
Allowed: [5, 13, 30, 33, 36, 38, 41]
Action: (5, 6)
[[0 3 0 5 0 3 0 5 0]
 [4 4 4 5 3 3 3 5 3]
 [0 3 1 5 0 3 0 5 0]
 [3 3 3 3 3 3 4 4 4]
 [0 5 2 3 0 5 0 3 0]
 [3 5 3 3 3 5 4 4 4]
 [0 5 0 3 0 5 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]]
Remaining nums: {1: 0, 2: 1}

Turn: 23
Player: 1
Allowed: [41]
Action: (6, 2)
[[0 3 0 5 0 3 0 5 0]
 [4 4 4 5 3 3 3 5 3]
 [0 3 0 5 0 3 0 5 0]
 [3 3 3 3 3 3 4 4 4]
 [0 5 2 3 0 5 0 3 0]
 [3 5 3 3 3 5 4 4 4]
 [0 5 1 3 0 5 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]]
Remaining nums: {1: 0, 2: 1}

Turn: 24
Player: -1
Allowed: [5, 15, 30, 43]
Action: (6, 4)
[[0 3 0 5 0 3 0 5 0]
 [4 4 4 5 3 3 3 5 3]
 [0 3 0 5 0 3 0 5 0]
 [3 3 3 3 3 3 4 4 4]
 [0 5 0 3 0 5 0 3 0]
 [3 5 3 3 3 5 4 4 4]
 [0 5 1 3 2 5 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]]
Remaining nums: {1: 0, 2: 1}

Turn: 25
Player: 1
Allowed: [30]
Action: (4, 4)
[[0 3 0 5 0 3 0 5 0]
 [4 4 4 5 3 3 3 5 3]
 [0 3 0 5 0 3 0 5 0]
 [3 3 3 3 3 3 4 4 4]
 [0 5 0 3 1 5 0 3 0]
 [3 5 3 3 3 5 4 

Player: 1
Allowed: [1, 3, 5, 7, 9, 10, 11, 12, 14, 16, 18, 20, 22, 23, 24, 25, 27, 29, 31, 33, 35, 36, 37, 38, 44, 53, 55]
Action: (3, 6)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 0 3 1 3 0 3 0]]
Remaining nums: {1: 3, 2: 4}

Turn: 4
Player: -1
Allowed: [1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 14, 16, 17, 18, 22, 23, 27, 29, 31, 33, 35, 36, 37, 38, 44]
Action: (5, 4)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 4 4 4 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 0 3 1 3 0 3 0]]
Remaining nums: {1: 3, 2: 3}

Turn: 5
Player: 1
Allowed: [1, 3, 5, 7, 9, 10, 11, 12, 14, 16, 18, 22, 23, 27, 29, 33, 35, 44, 53, 55]
Action: (4, 1)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 4 4 4 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 4 4 4]

Player: -1
Allowed: [3, 5, 6, 7, 11, 12, 14, 16, 18, 20, 21, 22, 23, 24, 25, 27, 29, 31, 35, 36, 44]
Action: (4, 1)
[[0 3 0 3 0 3 0 3 2]
 [4 4 4 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 3 3 4 4 4]
 [0 5 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 1 3 0 3 0 3 0]]
Remaining nums: {1: 2, 2: 3}

Turn: 9
Player: 1
Allowed: [3, 5, 7, 11, 12, 16, 18, 20, 23, 24, 25, 29, 31, 36, 44, 52, 54]
Action: (1, 4)
[[0 3 0 3 0 3 0 3 2]
 [4 4 4 3 4 4 4 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 5 3 3 3 3 4 4 4]
 [0 5 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 1 3 0 3 0 3 0]]
Remaining nums: {1: 1, 2: 3}

Turn: 10
Player: -1
Allowed: [3, 6, 7, 16, 21, 23, 24, 25, 29, 31, 36, 44]
Action: (5, 2)
[[0 3 0 3 0 3 0 3 2]
 [4 4 4 3 4 4 4 3 3]
 [0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 5 0 3 0 3 0 3 0]
 [3 5 4 4 4 3 4 4 4]
 [0 5 0 3 0 3 0 3 0]
 [3 3 4 4 4 3 4 4 4]
 [0 3 1 3 0 3 0 3 0]]
Remaining nums: {1: 1, 2: 2}

Turn: 11
Player: 1
Allowed

Player: -1
Allowed: [1, 2, 3, 5, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 22, 23, 27, 28, 29, 31, 35, 36, 37, 40, 42, 44, 48, 49, 50, 51]
Action: (2, 4)
[[0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 0 3 0 3 0 5 0]
 [3 3 3 3 3 3 3 5 3]
 [0 3 0 3 0 3 0 5 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 1 3 0 3 0 3 0]]
Remaining nums: {1: 4, 2: 4}

Turn: 9
Player: 1
Allowed: [1, 3, 5, 7, 9, 10, 11, 12, 14, 16, 18, 22, 23, 27, 29, 31, 35, 36, 37, 40, 41, 42, 44, 48, 49, 50, 51, 52, 54]
Action: (8, 0)
[[0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 0 3 0 3 0 5 0]
 [3 3 3 3 3 3 3 5 3]
 [0 3 0 3 0 3 0 5 0]
 [3 3 3 3 3 3 3 3 3]
 [1 3 0 3 0 3 0 3 0]]
Remaining nums: {1: 4, 2: 4}

Turn: 10
Player: -1
Allowed: [1, 3, 4, 5, 7, 9, 10, 11, 12, 14, 15, 16, 18, 19, 22, 23, 27, 29, 30, 31, 35, 36, 37, 40, 42, 44, 48, 49, 50, 51]
Action: (7, 6)
[[0 3 0 3 0 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 0 3 0 3

Player: -1
Allowed: [1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 14, 16, 17, 20, 22, 23, 24, 25, 27, 29, 33, 35, 36, 38, 40, 42, 46, 48, 49, 50, 51]
Action: (3, 0)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 0 3 0 3 0]
 [4 4 4 3 3 3 3 3 3]
 [0 3 0 3 0 5 0 3 0]
 [3 3 3 3 3 5 3 3 3]
 [0 3 0 3 0 5 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 1 3 0 3 0]]
Remaining nums: {1: 4, 2: 4}

Turn: 3
Player: 1
Allowed: [1, 3, 5, 7, 9, 10, 11, 12, 16, 20, 24, 25, 27, 29, 33, 35, 36, 38, 40, 42, 43, 46, 48, 49, 50, 51, 53, 55]
Action: (1, 6)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]
 [4 4 4 3 3 3 3 3 3]
 [0 3 0 3 0 5 0 3 0]
 [3 3 3 3 3 5 3 3 3]
 [0 3 0 3 0 5 0 3 0]
 [3 3 3 3 3 3 3 3 3]
 [0 3 0 3 1 3 0 3 0]]
Remaining nums: {1: 3, 2: 4}

Turn: 4
Player: -1
Allowed: [1, 2, 3, 5, 6, 9, 10, 16, 17, 20, 24, 25, 27, 29, 33, 35, 36, 38, 40, 42, 46, 48, 49, 50, 51]
Action: (7, 6)
[[0 3 0 3 2 3 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]
 [4 4 4 3 3 3 3 3 3]
 [0 3 0 3 0 5 0 3 0]
 [3 3 3 3 3 5 

Player: -1
Allowed: [2, 7, 9, 10, 12, 17, 20, 24, 25, 33, 35, 36, 37, 38, 46, 51]
Action: (5, 2)
[[0 3 0 3 2 5 0 3 0]
 [3 3 3 3 3 5 3 3 3]
 [0 5 0 5 0 5 0 3 0]
 [3 5 3 5 3 3 3 3 3]
 [0 5 0 5 0 3 0 3 0]
 [3 3 4 4 4 3 3 3 3]
 [0 3 0 3 0 5 0 3 0]
 [3 3 4 4 4 5 3 3 3]
 [0 3 0 3 1 5 0 3 0]]
Remaining nums: {1: 2, 2: 2}

Turn: 7
Player: 1
Allowed: [7, 10, 12, 20, 24, 25, 33, 38, 46, 51, 53]
Action: (3, 6)
[[0 3 0 3 2 5 0 3 0]
 [3 3 3 3 3 5 3 3 3]
 [0 5 0 5 0 5 0 3 0]
 [3 5 3 5 3 3 4 4 4]
 [0 5 0 5 0 3 0 3 0]
 [3 3 4 4 4 3 3 3 3]
 [0 3 0 3 0 5 0 3 0]
 [3 3 4 4 4 5 3 3 3]
 [0 3 0 3 1 5 0 3 0]]
Remaining nums: {1: 1, 2: 2}

Turn: 8
Player: -1
Allowed: [2, 7, 10, 12, 17, 33, 38, 46, 51]
Action: (6, 7)
[[0 3 0 3 2 5 0 3 0]
 [3 3 3 3 3 5 3 3 3]
 [0 5 0 5 0 5 0 3 0]
 [3 5 3 5 3 3 4 4 4]
 [0 5 0 5 0 3 0 3 0]
 [3 3 4 4 4 3 3 3 3]
 [0 3 0 3 0 5 0 5 0]
 [3 3 4 4 4 5 3 5 3]
 [0 3 0 3 1 5 0 5 0]]
Remaining nums: {1: 1, 2: 1}

Turn: 9
Player: 1
Allowed: [7, 10, 12, 38, 53]
Action: (8, 2)
[[0 3 0 3 2 5 0 3

Player: 1
Allowed: [53]
Action: (8, 2)
[[0 3 0 3 0 5 0 5 0]
 [3 3 3 3 3 5 3 5 3]
 [0 3 2 3 0 5 0 5 0]
 [3 3 3 3 4 4 4 3 3]
 [0 5 0 5 0 5 0 3 0]
 [3 5 3 5 3 5 4 4 4]
 [0 5 0 5 0 5 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 1 3 0 3 0 3 0]]
Remaining nums: {1: 0, 2: 2}

Turn: 12
Player: -1
Allowed: [1, 2, 3, 9, 10, 13, 17, 28, 49]
Action: (2, 4)
[[0 3 0 3 0 5 0 5 0]
 [3 3 3 3 3 5 3 5 3]
 [0 3 0 3 2 5 0 5 0]
 [3 3 3 3 4 4 4 3 3]
 [0 5 0 5 0 5 0 3 0]
 [3 5 3 5 3 5 4 4 4]
 [0 5 0 5 0 5 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 1 3 0 3 0 3 0]]
Remaining nums: {1: 0, 2: 2}

Turn: 13
Player: 1
Allowed: [41]
Action: (6, 2)
[[0 3 0 3 0 5 0 5 0]
 [3 3 3 3 3 5 3 5 3]
 [0 3 0 3 2 5 0 5 0]
 [3 3 3 3 4 4 4 3 3]
 [0 5 0 5 0 5 0 3 0]
 [3 5 3 5 3 5 4 4 4]
 [0 5 1 5 0 5 0 3 0]
 [3 3 3 3 3 3 4 4 4]
 [0 3 0 3 0 3 0 3 0]]
Remaining nums: {1: 0, 2: 2}

Turn: 14
Player: -1
Allowed: [1, 4, 9, 10, 15, 49]
Action: (2, 2)
[[0 3 0 3 0 5 0 5 0]
 [3 3 3 3 3 5 3 5 3]
 [0 3 2 3 0 5 0 5 0]
 [3 3 3 3 4 4 4 3 3]
 [0 5 0 5 0 5 0 3 0]
 [3 5

KeyboardInterrupt: 

## The following panels are not involved in the learning process

### Play matches between versions (use -1 for human player)

In [None]:
from game import Game
from funcs import playMatchesBetweenVersions
import loggers as lg

env = Game()
playMatchesBetweenVersions(env, 1, 1, 1, 10, lg.logger_tourney, 0)

### Pass a particular game state through the neural network (setup below for Connect4)

In [None]:
gs = GameState(np.array([
    0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,
    0,0,0,0,0,0,0]), 1)

preds = current_player.get_preds(gs)

print(preds)

### See the layers of the current neural network

In [None]:
current_player.model.viewLayers()

### Output a diagram of the neural network architecture

In [None]:
from keras.utils import plot_model
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)