In [1]:
import os


if 'tfg' not in os.listdir():
    os.chdir('..')

In [2]:
from tfg.alphaZero import create_alphazero
from tfg.alphaZeroCallbacks import Checkpoint
from tfg.alphaZeroAdapters import TicTacToeAdapter
from tfg.util import enable_gpu
from tfg.alphaZeroConfig import AlphaZeroConfig
from game.tictactoe import TicTacToe

import time

In [3]:
enable_gpu()

game = TicTacToe()

In [4]:
config = AlphaZeroConfig(
    learning_rate=.01,
    regularizer_constant=.0001,
    momentum=.0,
    residual_layers=7,
    filters=64,
    kernel_size=(3, 3)
)

checkpoint = Checkpoint(directory='models/checkpoints')

start = time.time()

alphazero = create_alphazero(game, TicTacToeAdapter(), max_workers=10,
                             self_play_times=120, max_games_counter=960,
                             buffer_size=1500, batch_size=512, temperature=100,
                             callbacks=[checkpoint], epochs=20,
                             c_puct=1, exploration_noise=(.25, .045),
                             mcts_iter=400, nn_config=config)

print("Finished training after", time.time() - start, "seconds")

2021-04-07 10:54:12,311	INFO services.py:1172 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


Epoch 1/20
16/16 - 7s - loss: 1.5301 - value_head_loss: 0.4495 - policy_head_loss: 2.3005
Epoch 2/20
16/16 - 0s - loss: 1.4980 - value_head_loss: 0.3617 - policy_head_loss: 2.1708
Epoch 3/20
16/16 - 0s - loss: 1.4713 - value_head_loss: 0.3240 - policy_head_loss: 2.1442
Epoch 4/20
16/16 - 0s - loss: 1.4245 - value_head_loss: 0.3083 - policy_head_loss: 2.0984
Epoch 5/20
16/16 - 0s - loss: 1.3720 - value_head_loss: 0.3069 - policy_head_loss: 2.0295
Epoch 6/20
16/16 - 0s - loss: 1.3105 - value_head_loss: 0.2922 - policy_head_loss: 1.9497
Epoch 7/20
16/16 - 0s - loss: 1.2691 - value_head_loss: 0.3057 - policy_head_loss: 1.8757
Epoch 8/20
16/16 - 0s - loss: 1.2057 - value_head_loss: 0.2803 - policy_head_loss: 1.7918
Epoch 9/20
16/16 - 0s - loss: 1.1872 - value_head_loss: 0.2846 - policy_head_loss: 1.7623
Epoch 10/20
16/16 - 0s - loss: 1.1539 - value_head_loss: 0.2753 - policy_head_loss: 1.7105
Epoch 11/20
16/16 - 0s - loss: 1.1067 - value_head_loss: 0.2675 - policy_head_loss: 1.6319
Epoch 12

Epoch 9/20
16/16 - 0s - loss: 0.6055 - value_head_loss: 0.1253 - policy_head_loss: 0.9455
Epoch 10/20
16/16 - 0s - loss: 0.6011 - value_head_loss: 0.1234 - policy_head_loss: 0.9434
Epoch 11/20
16/16 - 0s - loss: 0.5884 - value_head_loss: 0.1170 - policy_head_loss: 0.9291
Epoch 12/20
16/16 - 0s - loss: 0.6046 - value_head_loss: 0.1306 - policy_head_loss: 0.9479
Epoch 13/20
16/16 - 0s - loss: 0.6010 - value_head_loss: 0.1212 - policy_head_loss: 0.9493
Epoch 14/20
16/16 - 0s - loss: 0.5927 - value_head_loss: 0.1122 - policy_head_loss: 0.9426
Epoch 15/20
16/16 - 0s - loss: 0.5871 - value_head_loss: 0.1104 - policy_head_loss: 0.9347
Epoch 16/20
16/16 - 0s - loss: 0.5828 - value_head_loss: 0.1060 - policy_head_loss: 0.9350
Epoch 17/20
16/16 - 0s - loss: 0.5817 - value_head_loss: 0.1090 - policy_head_loss: 0.9311
Epoch 18/20
16/16 - 0s - loss: 0.5982 - value_head_loss: 0.1242 - policy_head_loss: 0.9412
Epoch 19/20
16/16 - 0s - loss: 0.5977 - value_head_loss: 0.1083 - policy_head_loss: 0.9507


In [5]:
alphazero.save('models/TicTacToe.h5')