In [1]:
import os


if 'tfg' not in os.listdir():
    os.chdir('..')

In [2]:
from tfg.alphaZero import create_alphazero, parallel_play
from tfg.alphaZeroAdapters import TicTacToeAdapter
from tfg.util import enable_gpu
from tfg.alphaZeroConfig import AlphaZeroConfig
from tfg.strategies import Minimax
from game.tictactoe import TicTacToe

import time
import pandas as pd

In [3]:
enable_gpu()

game = TicTacToe()
adapter = TicTacToeAdapter()

minimax = Minimax(game)

model_filename = 'models/TicTacToe_params.h5'

In [4]:
# lr, reg, filters, c, alpha
configs = [
    (.01,  0.,     16, 1.,  1.),
    (.001, 0.,     16, 1.,  1.),
    (.001, 0.,     32, 1.,  1.),
    
    (.01,  .0001,  16, 1.,  1.),
    (.001, .0001,  32, 1.,  1.),
    
    (.01,  0.,     32, 1.,  .5),
    (.001, 0.,     32, 1.,  .5),
    (.01,  .0001,  32, 1.,  .5),
    (.001, .0001,  32, 1.,  .5),
    
    (.001, 0.,     32, 1.2, .5),
    (.001, .0001,  32, 1.2, .5),
    
    (.001, 0.,     32, .8,  .5),
    (.001, .0001,  32, .8,  .5),
    
    (.001, .0001,  64, 1.,  .5)
]

In [5]:
results = []

for lr, reg, filters, c, alpha in configs:
    
    config = AlphaZeroConfig(
        learning_rate=lr,
        regularizer_constant=reg,
        residual_layers=1,
        filters=filters,
        kernel_size=(3, 3)
    )
    
    start = time.time()

    alphazero = create_alphazero(game, adapter, max_workers=10, self_play_times=50,
                                 max_games_counter=100, buffer_size=500,
                                 batch_size=384, temperature=100, epochs=5, c_puct=c,
                                 exploration_noise=(.25, alpha), mcts_iter=200, nn_config=config)
    print("Finished training after", time.time() - start, "seconds")

    alphazero.save(model_filename)

    _, d, _ = parallel_play(game, adapter, minimax, model_filename,
                            'black', max_workers=10, mcts_iter=100, games=50)
    results.append(d)

2021-05-15 11:49:00,066	INFO services.py:1172 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


Epoch 1/5
12/12 - 3s - loss: 1.4160 - value_head_loss: 0.5784 - policy_head_loss: 2.2537
Epoch 2/5
12/12 - 0s - loss: 1.2948 - value_head_loss: 0.4618 - policy_head_loss: 2.1278
Epoch 3/5
12/12 - 0s - loss: 1.2418 - value_head_loss: 0.4093 - policy_head_loss: 2.0743
Epoch 4/5
12/12 - 0s - loss: 1.2133 - value_head_loss: 0.4018 - policy_head_loss: 2.0249
Epoch 5/5
12/12 - 0s - loss: 1.1623 - value_head_loss: 0.3607 - policy_head_loss: 1.9639
Games played: 50
Epoch 1/5
12/12 - 0s - loss: 1.1803 - value_head_loss: 0.3989 - policy_head_loss: 1.9617
Epoch 2/5
12/12 - 0s - loss: 1.0329 - value_head_loss: 0.2903 - policy_head_loss: 1.7755
Epoch 3/5
12/12 - 0s - loss: 0.9373 - value_head_loss: 0.2647 - policy_head_loss: 1.6098
Epoch 4/5
12/12 - 0s - loss: 0.8760 - value_head_loss: 0.2454 - policy_head_loss: 1.5065
Epoch 5/5
12/12 - 0s - loss: 0.8335 - value_head_loss: 0.2261 - policy_head_loss: 1.4409
Games played: 100
Finished training after 38.41213893890381 seconds
Epoch 1/5
12/12 - 1s - lo

Games played: 50
Epoch 1/5
12/12 - 0s - loss: 1.1981 - value_head_loss: 0.3983 - policy_head_loss: 1.9776
Epoch 2/5
12/12 - 0s - loss: 1.1213 - value_head_loss: 0.3558 - policy_head_loss: 1.8665
Epoch 3/5
12/12 - 0s - loss: 1.0728 - value_head_loss: 0.3227 - policy_head_loss: 1.8027
Epoch 4/5
12/12 - 0s - loss: 1.0362 - value_head_loss: 0.3080 - policy_head_loss: 1.7440
Epoch 5/5
12/12 - 0s - loss: 0.9954 - value_head_loss: 0.2797 - policy_head_loss: 1.6906
Games played: 100
Finished training after 31.071317195892334 seconds


Traceback (most recent call last):
  File "C:\Users\pirra\Anaconda3\envs\AlphaZero\lib\site-packages\ray\log_monitor.py", line 359, in <module>
    log_monitor.run()
  File "C:\Users\pirra\Anaconda3\envs\AlphaZero\lib\site-packages\ray\log_monitor.py", line 280, in run
    self.open_closed_files()
  File "C:\Users\pirra\Anaconda3\envs\AlphaZero\lib\site-packages\ray\log_monitor.py", line 167, in open_closed_files
    self.close_all_files()
  File "C:\Users\pirra\Anaconda3\envs\AlphaZero\lib\site-packages\ray\log_monitor.py", line 102, in close_all_files
    os.kill(file_info.worker_pid, 0)
TypeError: an integer is required (got type str)



Epoch 1/5
12/12 - 1s - loss: 1.4170 - value_head_loss: 0.5894 - policy_head_loss: 2.2446
Epoch 2/5
12/12 - 0s - loss: 1.2732 - value_head_loss: 0.4792 - policy_head_loss: 2.0672
Epoch 3/5
12/12 - 0s - loss: 1.2206 - value_head_loss: 0.4314 - policy_head_loss: 2.0098
Epoch 4/5
12/12 - 0s - loss: 1.1725 - value_head_loss: 0.3938 - policy_head_loss: 1.9511
Epoch 5/5
12/12 - 0s - loss: 1.1317 - value_head_loss: 0.3513 - policy_head_loss: 1.9121
Games played: 50
Epoch 1/5
12/12 - 0s - loss: 1.1920 - value_head_loss: 0.3715 - policy_head_loss: 2.0126
Epoch 2/5
12/12 - 0s - loss: 1.1202 - value_head_loss: 0.3336 - policy_head_loss: 1.9068
Epoch 3/5
12/12 - 0s - loss: 1.0799 - value_head_loss: 0.3060 - policy_head_loss: 1.8538
Epoch 4/5
12/12 - 0s - loss: 1.0473 - value_head_loss: 0.2827 - policy_head_loss: 1.8119
Epoch 5/5
12/12 - 0s - loss: 1.0188 - value_head_loss: 0.2667 - policy_head_loss: 1.7708
Games played: 100
Finished training after 29.719826459884644 seconds
Epoch 1/5
12/12 - 1s - l

In [6]:
df = pd.DataFrame(configs, columns=['lr', 'reg', 'filters', 'c', 'alpha'])
df['draws'] = results

df

Unnamed: 0,lr,reg,filters,c,alpha,draws
0,0.01,0.0,16,1.0,1.0,24
1,0.001,0.0,16,1.0,1.0,13
2,0.001,0.0,32,1.0,1.0,25
3,0.01,0.0001,16,1.0,1.0,46
4,0.001,0.0001,32,1.0,1.0,17
5,0.01,0.0,32,1.0,0.5,46
6,0.001,0.0,32,1.0,0.5,34
7,0.01,0.0001,32,1.0,0.5,48
8,0.001,0.0001,32,1.0,0.5,47
9,0.001,0.0,32,1.2,0.5,41


In [7]:
df.sort_values('draws', ascending=False)

Unnamed: 0,lr,reg,filters,c,alpha,draws
7,0.01,0.0001,32,1.0,0.5,48
8,0.001,0.0001,32,1.0,0.5,47
12,0.001,0.0001,32,0.8,0.5,47
3,0.01,0.0001,16,1.0,1.0,46
5,0.01,0.0,32,1.0,0.5,46
10,0.001,0.0001,32,1.2,0.5,46
11,0.001,0.0,32,0.8,0.5,44
9,0.001,0.0,32,1.2,0.5,41
6,0.001,0.0,32,1.0,0.5,34
2,0.001,0.0,32,1.0,1.0,25
