### Import useful libraries

In [1]:
import random
from datetime import datetime
from tqdm import trange
from board import Board
from player import Player

### Test if the player learned the optimal strategy

In [2]:
me     = 'o'
agent  = 'x'

board  = Board(nrow=3, ncol=3, sign_play=[agent,me])
player = Player(sign=agent)

In [17]:
player.load_args("params/player-201024-1616")
player.playing_mode()
# player.training_mode()

In [6]:
# number of testing matches
num_test = 1000
# number of matches that finished in a draw
num_draw = 0

for n in trange(num_test):
    # reset board for new game
    board.reset()
    
    if random.random() < 0.5:
        # start in random location
        i = random.choices(range(board.get_nrow()))[0]
        j = random.choices(range(board.get_nrow()))[0]
        board.add(me, row=i, col=j)

    # assume the game will end in a draw
    num_draw += 1
    while not board.is_full():
        # RL agent chooses an action
        action = player.choose_action(board)
        # update board
        board.add(sign=agent, row=action[0], col=action[1])
        # check if RL agent won
        if board.is_won(): 
            num_draw -= 1
            break
        # if nobody won yet, inverse the board
        board.inverse()

print("Finished testing")
print('   number of draws : ', num_draw, " of ", num_test)

100%|██████████| 1000/1000 [00:10<00:00, 98.76it/s]

Finished testing
   number of draws :  463  of  1000





### Play against it

In [21]:
board.reset()

In [23]:
if board.is_full():
    board.reset()
    
# None or array [row,col]
my_pos = [0,2]
if my_pos:
    board.add(me, row=my_pos[0], col=my_pos[1])

if (not board.is_full()) and (not board.is_won()):
    # player 'x' plays
    board = player.play(board)

board.print()


 - - o 
 - x - 
 o - x 


In [9]:
print(player.get_value('o---x-ox-'))
print(player.get_value('o-oxx----'))
print(player.get_value('-xo-x---o'))
print(player.get_value('----xxo-o'))

print(player.get_value('ox--x-o--'))
print(player.get_value('---xx-o-o'))
print(player.get_value('--o-x--xo'))
print(player.get_value('o-o-xx---'))

0.3376566291318732
0.4058909628931378
0.3730916491088306
0.359128154771456
0.3710585464304482
0.35551614580561286
0.391848808938505
0.3696389329362915


In [7]:
# for order = 0 expect value = 0.33
# for order = 1 expect value = 0.23

print(player.get_value('x-o-xx-oo'))
print(player.get_value('--xox-oxo'))
print(player.get_value('oo-xx-o-x'))
print(player.get_value('oxo-xox--'))

print(player.get_value('o-xxx-oo-'))
print(player.get_value('x---xooxo'))
print(player.get_value('-oo-xxx-o'))
print(player.get_value('oxoox---x'))

0.3360004847737507
0.3503852371982995
0.3364975933992559
0.31401597203152937
0.3480994549796421
0.3487923251962265
0.3246547887676268
0.37648410493662854


In [8]:
# for order = 0 expect value = 0.75
# for order = 1 expect value = 0.5

print(player.get_value('o-xxoo-xx'))
print(player.get_value('-xoxo-xox'))
print(player.get_value('xx-ooxx-o'))
print(player.get_value('xox-oxox-'))

print(player.get_value('x-oooxxx-'))
print(player.get_value('ox--oxxox'))
print(player.get_value('-xxxooo-x'))
print(player.get_value('xoxxo--xo'))

0.7436764704587027
0.759566217619852
0.7411713228308788
0.7539858457353724
0.7417074080047255
0.7794560514602216
0.8099872864957541
0.7557333430243729


In [25]:
# for order = 0 expect value = 0

print(player.get_value('xxooxx-oo'))

2.05568843079756e-10
