In [1]:
import os


if 'tfg' not in os.listdir():
    os.chdir('..')

In [2]:
from tfg.alphaZero import create_alphazero
from tfg.alphaZeroCallbacks import Checkpoint
from tfg.alphaZeroAdapters import TicTacToeAdapter
from tfg.util import enable_gpu
from tfg.alphaZeroConfig import AlphaZeroConfig
from game.tictactoe import TicTacToe

import time

In [3]:
enable_gpu()

game = TicTacToe()
adapter = TicTacToeAdapter()

In [4]:
config = AlphaZeroConfig(
    learning_rate=.01,
    regularizer_constant=.0001,
    residual_layers=1,
    filters=32,
    kernel_size=(3, 3)
)

checkpoint = Checkpoint(directory='models/checkpoints/tictactoe', delay=2)

start = time.time()

alphazero = create_alphazero(game, adapter, max_workers=10, self_play_times=50,
                             max_games_counter=1000, buffer_size=500, callbacks=[checkpoint],
                             batch_size=384, temperature=100, epochs=5, c_puct=1,
                             exploration_noise=(.25, .5), mcts_iter=200, nn_config=config)

print("Finished training after", time.time() - start, "seconds")

2021-05-15 19:00:53,586	INFO services.py:1172 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


Epoch 1/5
12/12 - 3s - loss: 1.4250 - value_head_loss: 0.5972 - policy_head_loss: 2.2295
Epoch 2/5
12/12 - 0s - loss: 1.3304 - value_head_loss: 0.5621 - policy_head_loss: 2.0691
Epoch 3/5
12/12 - 0s - loss: 1.2542 - value_head_loss: 0.5324 - policy_head_loss: 1.9432
Epoch 4/5
12/12 - 0s - loss: 1.1794 - value_head_loss: 0.4969 - policy_head_loss: 1.8270
Epoch 5/5
12/12 - 0s - loss: 1.1320 - value_head_loss: 0.4874 - policy_head_loss: 1.7398
Games played: 50
Epoch 1/5
12/12 - 0s - loss: 1.1476 - value_head_loss: 0.3997 - policy_head_loss: 1.8572
Epoch 2/5
12/12 - 0s - loss: 1.0656 - value_head_loss: 0.3630 - policy_head_loss: 1.7276
Epoch 3/5
12/12 - 0s - loss: 1.0231 - value_head_loss: 0.3437 - policy_head_loss: 1.6597
Epoch 4/5
12/12 - 0s - loss: 0.9780 - value_head_loss: 0.2998 - policy_head_loss: 1.6113
Epoch 5/5
12/12 - 0s - loss: 0.9553 - value_head_loss: 0.2884 - policy_head_loss: 1.5757
Games played: 100
Checkpoint saved at models/checkpoints/tictactoe/checkpoint1.h5
Epoch 1/5
1

12/12 - 0s - loss: 0.4820 - value_head_loss: 0.1226 - policy_head_loss: 0.7821
Epoch 5/5
12/12 - 0s - loss: 0.4677 - value_head_loss: 0.1195 - policy_head_loss: 0.7563
Games played: 850
Epoch 1/5
12/12 - 0s - loss: 0.4526 - value_head_loss: 0.1088 - policy_head_loss: 0.7368
Epoch 2/5
12/12 - 0s - loss: 0.4235 - value_head_loss: 0.0996 - policy_head_loss: 0.6876
Epoch 3/5
12/12 - 0s - loss: 0.4111 - value_head_loss: 0.0947 - policy_head_loss: 0.6679
Epoch 4/5
12/12 - 0s - loss: 0.4067 - value_head_loss: 0.0962 - policy_head_loss: 0.6579
Epoch 5/5
12/12 - 0s - loss: 0.4005 - value_head_loss: 0.0917 - policy_head_loss: 0.6510
Games played: 900
Checkpoint saved at models/checkpoints/tictactoe/checkpoint9.h5
Epoch 1/5
12/12 - 0s - loss: 0.3729 - value_head_loss: 0.0468 - policy_head_loss: 0.6418
Epoch 2/5
12/12 - 0s - loss: 0.3539 - value_head_loss: 0.0374 - policy_head_loss: 0.6138
Epoch 3/5
12/12 - 0s - loss: 0.3457 - value_head_loss: 0.0355 - policy_head_loss: 0.6002
Epoch 4/5
12/12 - 0s

In [5]:
alphazero.save('models/TicTacToe.h5')