### Import useful libraries

In [1]:
import random
from datetime import datetime
from tqdm import trange
from board import Board
from player import Player

### Test if the player learned the optimal strategy

In [2]:
me     = 'o'
agent  = 'x'

board  = Board(nrow=3, ncol=3, sign_play=[agent,me])
player = Player(sign=agent)

In [3]:
player.load_args("params/player-201024-1616")
player.playing_mode()
# player.training_mode()

In [None]:
# number of testing matches
num_test = 1000
# number of matches that finished in a draw
num_draw = 0

for n in trange(num_test):
    # reset board for new game
    board.reset()
    
    if random.random() < 0.5:
        # start in random location
        i = random.choices(range(board.get_nrow()))[0]
        j = random.choices(range(board.get_nrow()))[0]
        board.add(me, row=i, col=j)

    # assume the game will end in a draw
    num_draw += 1
    while not board.is_full():
        # RL agent chooses an action
        action = player.choose_action(board)
        # update board
        board.add(sign=agent, row=action[0], col=action[1])
        # check if RL agent won
        if board.is_won(): 
            num_draw -= 1
            break
        # if nobody won yet, inverse the board
        board.inverse()

print("Finished testing")
print('   number of draws : ', num_draw, " of ", num_test)

### Play against it

In [None]:
board.reset()

In [None]:
if board.is_full():
    board.reset()
    
# None or array [row,col]
my_pos = [0,2]
if my_pos:
    board.add(me, row=my_pos[0], col=my_pos[1])

if (not board.is_full()) and (not board.is_won()):
    # player 'x' plays
    board = player.play(board)

board.print()

In [4]:
print(player.get_value('o---x-ox-'))
print(player.get_value('o-oxx----'))
print(player.get_value('-xo-x---o'))
print(player.get_value('----xxo-o'))

print(player.get_value('ox--x-o--'))
print(player.get_value('---xx-o-o'))
print(player.get_value('--o-x--xo'))
print(player.get_value('o-o-xx---'))

0.25119366839676843
0.24656356365521936
0.2613599508728653
0.2887826841650715
0.28583827291815883
0.338340065006745
0.2612276612720202
0.2746840471803412


In [5]:
# for order = 0 expect value = 0.33
# for order = 1 expect value = 0.23

print(player.get_value('x-o-xx-oo'))
print(player.get_value('--xox-oxo'))
print(player.get_value('oo-xx-o-x'))
print(player.get_value('oxo-xox--'))

print(player.get_value('o-xxx-oo-'))
print(player.get_value('x---xooxo'))
print(player.get_value('-oo-xxx-o'))
print(player.get_value('oxoox---x'))

0.24856401257620495
0.19128543600751763
0.2543444891479068
0.2197149170383297
0.2506367913435575
0.23298288821900764
0.22807177783526933
0.19088018453551397


In [6]:
# for order = 0 expect value = 0.75
# for order = 1 expect value = 0.5

print(player.get_value('o-xxoo-xx'))
print(player.get_value('-xoxo-xox'))
print(player.get_value('xx-ooxx-o'))
print(player.get_value('xox-oxox-'))

print(player.get_value('x-oooxxx-'))
print(player.get_value('ox--oxxox'))
print(player.get_value('-xxxooo-x'))
print(player.get_value('xoxxo--xo'))

0.5000407905699656
0.5009240992803863
0.5026535010365115
0.5000168573042071
0.5000345392072042
0.5001416168693888
0.500865127631605
0.5000049834113627


In [None]:
# for order = 0 expect value = 0

print(player.get_value('xxooxx-oo'))