In [1]:
import os


if 'tfg' not in os.listdir():
    os.chdir('..')

In [2]:
from tfg.alphaZero import create_alphazero
from tfg.alphaZeroCallbacks import Checkpoint
from tfg.alphaZeroAdapters import TicTacToeAdapter
from tfg.util import enable_gpu
from tfg.alphaZeroConfig import AlphaZeroConfig
from game.tictactoe import TicTacToe

import time

In [3]:
enable_gpu()

game = TicTacToe()

In [4]:
config = AlphaZeroConfig(
    learning_rate=.01,
    regularizer_constant=.0,
    residual_layers=3,
    filters=32,
    kernel_size=(3, 3)
)

start = time.time()

alphazero = create_alphazero(game, TicTacToeAdapter(), max_workers=10,
                             self_play_times=120, max_games_counter=600,
                             buffer_size=1500, batch_size=1024, temperature=100,
                             epochs=5, c_puct=1, exploration_noise=(.25, .045),
                             mcts_iter=200, nn_config=config)

print("Finished training after", time.time() - start, "seconds")

2021-05-11 18:33:16,761	INFO services.py:1172 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


Epoch 1/5
32/32 - 5s - loss: 1.3492 - value_head_loss: 0.4768 - policy_head_loss: 2.2216
Epoch 2/5
32/32 - 0s - loss: 1.2505 - value_head_loss: 0.4113 - policy_head_loss: 2.0898
Epoch 3/5
32/32 - 0s - loss: 1.1535 - value_head_loss: 0.3877 - policy_head_loss: 1.9194
Epoch 4/5
32/32 - 0s - loss: 1.0918 - value_head_loss: 0.3635 - policy_head_loss: 1.8201
Epoch 5/5
32/32 - 0s - loss: 1.0375 - value_head_loss: 0.3364 - policy_head_loss: 1.7385
Games played: 120
Epoch 1/5
32/32 - 1s - loss: 0.8922 - value_head_loss: 0.2707 - policy_head_loss: 1.5136
Epoch 2/5
32/32 - 0s - loss: 0.7746 - value_head_loss: 0.2397 - policy_head_loss: 1.3096
Epoch 3/5
32/32 - 0s - loss: 0.7370 - value_head_loss: 0.2291 - policy_head_loss: 1.2448
Epoch 4/5
32/32 - 0s - loss: 0.6946 - value_head_loss: 0.2188 - policy_head_loss: 1.1704
Epoch 5/5
32/32 - 0s - loss: 0.6809 - value_head_loss: 0.2169 - policy_head_loss: 1.1448
Games played: 240
Epoch 1/5
32/32 - 0s - loss: 0.5858 - value_head_loss: 0.1420 - policy_hea

In [5]:
alphazero.save('models/TicTacToe_copy.h5')

In [1]:
import os


if 'tfg' not in os.listdir():
    os.chdir('..')
    
from tfg.strategies import Minimax, MonteCarloTree
from tfg.alphaZero import parallel_play
from tfg.alphaZeroAdapters import TicTacToeAdapter
from tfg.util import enable_gpu,play
from game.tictactoe import TicTacToe

In [2]:
enable_gpu()

game = TicTacToe()
minimax = Minimax(game)

In [3]:
parallel_play(game, TicTacToeAdapter(), Minimax(game),
              'models/TicTacToe_copy.h5', 'black', max_workers=10,
              mcts_iter=2, games=100)

(12, 88, 0)

In [4]:
parallel_play(game, TicTacToeAdapter(), Minimax(game),
              'models/TicTacToe_copy.h5', 'black', max_workers=10,
              mcts_iter=200, games=100)

(0, 100, 0)