In [1]:
import os

if 'tfg' not in os.listdir():
    os.chdir('..')

In [2]:
from tfg.alphaZero import create_alphazero
from tfg.alphaZeroCallbacks import Checkpoint
from tfg.alphaZeroAdapters import ConnectNAdapter
from tfg.util import enable_gpu
from tfg.alphaZeroConfig import AlphaZeroConfig
from game.connect_n import ConnectN

import time

In [3]:
enable_gpu()

game = ConnectN()
adapter = ConnectNAdapter(game)

config = AlphaZeroConfig(
    learning_rate=.001,
    regularizer_constant=.0,
    residual_layers=2,
    filters=64,
    kernel_size=(3, 3)
)

In [4]:
start = time.time()

checkpoint = Checkpoint(directory='models/checkpoints/connect4', delay=5)

alphazero = create_alphazero(game, adapter, max_workers=10, callbacks=[checkpoint],
                             self_play_times=50, max_games_counter=1500,
                             buffer_size=2500, batch_size=1792, temperature=30,
                             epochs=10, c_puct=1, exploration_noise=(.25, .5),
                             mcts_iter=400, nn_config=config)

print("Finished training after", time.time() - start, "seconds")

2021-05-16 15:23:14,827	INFO services.py:1172 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


Epoch 1/10
38/38 - 4s - loss: 1.4644 - value_head_loss: 0.7721 - policy_head_loss: 2.1567
Epoch 2/10
38/38 - 0s - loss: 1.1935 - value_head_loss: 0.4226 - policy_head_loss: 1.9643
Epoch 3/10
38/38 - 0s - loss: 1.1384 - value_head_loss: 0.3631 - policy_head_loss: 1.9137
Epoch 4/10
38/38 - 0s - loss: 1.1070 - value_head_loss: 0.3302 - policy_head_loss: 1.8838
Epoch 5/10
38/38 - 0s - loss: 1.0769 - value_head_loss: 0.2825 - policy_head_loss: 1.8714
Epoch 6/10
38/38 - 0s - loss: 1.0374 - value_head_loss: 0.2429 - policy_head_loss: 1.8318
Epoch 7/10
38/38 - 0s - loss: 1.0357 - value_head_loss: 0.2366 - policy_head_loss: 1.8347
Epoch 8/10
38/38 - 0s - loss: 1.0235 - value_head_loss: 0.2445 - policy_head_loss: 1.8024
Epoch 9/10
38/38 - 0s - loss: 1.0065 - value_head_loss: 0.2297 - policy_head_loss: 1.7833
Epoch 10/10
38/38 - 0s - loss: 0.9953 - value_head_loss: 0.2291 - policy_head_loss: 1.7615
Games played: 50
Epoch 1/10
56/56 - 1s - loss: 1.2241 - value_head_loss: 0.5847 - policy_head_loss:

Epoch 10/10
56/56 - 0s - loss: 0.4619 - value_head_loss: 0.2522 - policy_head_loss: 0.6716
Games played: 450
Epoch 1/10
56/56 - 0s - loss: 0.8515 - value_head_loss: 0.5404 - policy_head_loss: 1.1625
Epoch 2/10
56/56 - 0s - loss: 0.6421 - value_head_loss: 0.3545 - policy_head_loss: 0.9296
Epoch 3/10
56/56 - 0s - loss: 0.5610 - value_head_loss: 0.2941 - policy_head_loss: 0.8279
Epoch 4/10
56/56 - 0s - loss: 0.5220 - value_head_loss: 0.2814 - policy_head_loss: 0.7625
Epoch 5/10
56/56 - 0s - loss: 0.4970 - value_head_loss: 0.2596 - policy_head_loss: 0.7344
Epoch 6/10
56/56 - 0s - loss: 0.4681 - value_head_loss: 0.2433 - policy_head_loss: 0.6928
Epoch 7/10
56/56 - 0s - loss: 0.4469 - value_head_loss: 0.2390 - policy_head_loss: 0.6548
Epoch 8/10
56/56 - 0s - loss: 0.4414 - value_head_loss: 0.2363 - policy_head_loss: 0.6465
Epoch 9/10
56/56 - 0s - loss: 0.4282 - value_head_loss: 0.2304 - policy_head_loss: 0.6261
Epoch 10/10
56/56 - 0s - loss: 0.4232 - value_head_loss: 0.2275 - policy_head_los

Epoch 8/10
56/56 - 0s - loss: 0.5116 - value_head_loss: 0.3325 - policy_head_loss: 0.6906
Epoch 9/10
56/56 - 0s - loss: 0.4974 - value_head_loss: 0.3209 - policy_head_loss: 0.6740
Epoch 10/10
56/56 - 0s - loss: 0.4871 - value_head_loss: 0.3160 - policy_head_loss: 0.6582
Games played: 900
Epoch 1/10
56/56 - 0s - loss: 0.9176 - value_head_loss: 0.6434 - policy_head_loss: 1.1918
Epoch 2/10
56/56 - 0s - loss: 0.6779 - value_head_loss: 0.4613 - policy_head_loss: 0.8945
Epoch 3/10
56/56 - 0s - loss: 0.6198 - value_head_loss: 0.4259 - policy_head_loss: 0.8136
Epoch 4/10
56/56 - 0s - loss: 0.5787 - value_head_loss: 0.4066 - policy_head_loss: 0.7508
Epoch 5/10
56/56 - 0s - loss: 0.5585 - value_head_loss: 0.3939 - policy_head_loss: 0.7232
Epoch 6/10
56/56 - 0s - loss: 0.5427 - value_head_loss: 0.3959 - policy_head_loss: 0.6894
Epoch 7/10
56/56 - 0s - loss: 0.5200 - value_head_loss: 0.3829 - policy_head_loss: 0.6570
Epoch 8/10
56/56 - 0s - loss: 0.5144 - value_head_loss: 0.3738 - policy_head_loss

Epoch 6/10
56/56 - 0s - loss: 0.4232 - value_head_loss: 0.2435 - policy_head_loss: 0.6030
Epoch 7/10
56/56 - 0s - loss: 0.4096 - value_head_loss: 0.2314 - policy_head_loss: 0.5879
Epoch 8/10
56/56 - 0s - loss: 0.3973 - value_head_loss: 0.2263 - policy_head_loss: 0.5683
Epoch 9/10
56/56 - 0s - loss: 0.3906 - value_head_loss: 0.2211 - policy_head_loss: 0.5601
Epoch 10/10
56/56 - 0s - loss: 0.3850 - value_head_loss: 0.2202 - policy_head_loss: 0.5498
Games played: 1350
Epoch 1/10
56/56 - 0s - loss: 0.8616 - value_head_loss: 0.5628 - policy_head_loss: 1.1603
Epoch 2/10
56/56 - 0s - loss: 0.6352 - value_head_loss: 0.4099 - policy_head_loss: 0.8606
Epoch 3/10
56/56 - 0s - loss: 0.5628 - value_head_loss: 0.3601 - policy_head_loss: 0.7656
Epoch 4/10
56/56 - 0s - loss: 0.5242 - value_head_loss: 0.3499 - policy_head_loss: 0.6984
Epoch 5/10
56/56 - 0s - loss: 0.5008 - value_head_loss: 0.3339 - policy_head_loss: 0.6677
Epoch 6/10
56/56 - 0s - loss: 0.4742 - value_head_loss: 0.3085 - policy_head_los

In [5]:
alphazero.save('models/Connect4.h5')