Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [255]:
from itertools import combinations, product
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy

from tqdm.auto import tqdm
import numpy as np

In [256]:
MAGIC = np.array([2, 7, 6, 9, 5, 1, 4, 3, 8])

In [257]:
def check_board(matrix):
    ravel = matrix.ravel()
    if abs((sum(ravel == 1) - sum(ravel == 2))) > 1: return -1
    if any(sum(c) == 15 for c in combinations(MAGIC[ravel == 1], 3)): return 1
    if any(sum(c) == 15 for c in combinations(MAGIC[ravel == 2], 3)): return 2
    return 0

def state_value(matrix):
    """Evaluate state: +1 first player wins"""
    val = check_board(matrix)
    if val == 1: return 1
    elif val == 2: return -1
    else: return 0

In [258]:
class Random_player:
    def __init__(self): pass

    def move(self, matrix): return tuple(choice(np.argwhere(matrix == 0)))

In [259]:
all_states = [np.array(i).reshape(3, 3) for i in product([0, 1, 2], repeat= 9)]
print(len(all_states))

possible_states = []
for state in all_states:
    not_present = True
    if tuple(np.fliplr(state).ravel()) in possible_states: not_present = False
    elif tuple(np.flipud(state).ravel()) in possible_states: not_present = False
    elif tuple(np.rot90(state).ravel()) in possible_states: not_present = False
    elif tuple(np.rot90(np.rot90(state)).ravel()) in possible_states: not_present = False
    elif tuple(np.rot90(np.rot90(np.rot90(state))).ravel()) in possible_states: not_present = False

    if not_present:
        if sum(sum(state == 0)) != 0:
            possible_states.append(tuple(state.ravel()))

print(len(possible_states))

19683
4855


In [260]:
class Player:
    def __init__(self, states, epsilon):
        self.states = states
        self.epsilon = epsilon
        self.val_dict = defaultdict()
        self.moves_dict = defaultdict()
        for state in states:
            for move in np.argwhere(np.array(list(state)).reshape(3, 3) == 0):
                self.val_dict[(state, tuple(move))] = 0

    def update_val_dict(self, trajectory, reward):
        for matrix, move in trajectory:

            if tuple(matrix.ravel()) in self.states:
                #print('straight')
                hashable_state = (tuple(matrix.ravel()), move)
                self.val_dict[hashable_state] = self.val_dict[hashable_state] + self.epsilon * (reward - self.val_dict[hashable_state])

            elif tuple(np.fliplr(matrix).ravel()) in self.states:
                #print('lr')
                tmp = np.zeros((3, 3))
                tmp[move] = 1
                tmp = np.fliplr(tmp)
                new_move = tuple(np.argwhere(tmp == 1)[0])
                hashable_state = (tuple(np.fliplr(matrix).ravel()), new_move)
                #print(f'to\n{np.array(list(hashable_state[0])).reshape(3, 3)}\n{hashable_state[1]}')
                self.val_dict[hashable_state] = self.val_dict[hashable_state] + self.epsilon * (reward - self.val_dict[hashable_state])

            elif tuple(np.flipud(matrix).ravel()) in self.states:
                #print('ud')
                tmp = np.zeros((3, 3))
                tmp[move] = 1
                tmp = np.flipud(tmp)
                new_move = tuple(np.argwhere(tmp == 1)[0])
                hashable_state = (tuple(np.flipud(matrix).ravel()), new_move)
                #print(f'to\n{np.array(list(hashable_state[0])).reshape(3, 3)}\n{hashable_state[1]}')
                self.val_dict[hashable_state] = self.val_dict[hashable_state] + self.epsilon * (reward - self.val_dict[hashable_state])

            elif tuple(np.rot90(matrix).ravel()) in self.states:
                #print('rot90')
                tmp = np.zeros((3, 3))
                tmp[move] = 1
                tmp = np.rot90(tmp)
                new_move = tuple(np.argwhere(tmp == 1)[0])
                hashable_state = (tuple(np.rot90(matrix).ravel()), new_move)
                #print(f'to\n{np.array(list(hashable_state[0])).reshape(3, 3)}\n{hashable_state[1]}')
                self.val_dict[hashable_state] = self.val_dict[hashable_state] + self.epsilon * (reward - self.val_dict[hashable_state])

            elif tuple(np.rot90(np.rot90(matrix)).ravel()) in self.states:
                #print('rot180')
                tmp = np.zeros((3, 3))
                tmp[move] = 1
                tmp = np.rot90(np.rot90(tmp))
                new_move = tuple(np.argwhere(tmp == 1)[0])
                hashable_state = (tuple(np.rot90(np.rot90(matrix)).ravel()), new_move)
                #print(f'to\n{np.array(list(hashable_state[0])).reshape(3, 3)}\n{hashable_state[1]}')
                self.val_dict[hashable_state] = self.val_dict[hashable_state] + self.epsilon * (reward - self.val_dict[hashable_state])

            elif tuple(np.rot90(np.rot90(np.rot90(matrix))).ravel()) in self.states:
                #print('rot270')
                tmp = np.zeros((3, 3))
                tmp[move] = 1
                tmp = np.rot90(np.rot90(np.rot90(tmp)))
                new_move = tuple(np.argwhere(tmp == 1)[0])
                hashable_state = (tuple(np.rot90(np.rot90(np.rot90(matrix))).ravel()), new_move)
                #print(f'to\n{np.array(list(hashable_state[0])).reshape(3, 3)}\n{hashable_state[1]}')
                self.val_dict[hashable_state] = self.val_dict[hashable_state] + self.epsilon * (reward - self.val_dict[hashable_state])

            else:
                print('error')
                print(matrix)
                print(matrix.ravel())
                print(move)
                print(tuple(matrix.ravel()) in self.states)
                for kk in self.val_dict.keys():
                    if kk[0] == tuple(matrix.ravel()): print(kk)
                print(self.error)

    def compute_move_dict(self):

        self.moves_dict = defaultdict()

        for x in sorted(self.val_dict.items(), key=lambda e: e[1], reverse=True):

            if x[0][0] not in self.moves_dict.keys(): self.moves_dict[x[0][0]] = x[0][1]
    
    def move(self, matrix):

        if tuple(matrix.ravel()) in self.moves_dict.keys(): return self.moves_dict[tuple(matrix.ravel())]

        elif tuple(np.fliplr(matrix).ravel()) in self.moves_dict.keys():
            #print('lr')
            #print(f'from {np.fliplr(matrix)}')
            move = self.moves_dict[tuple(np.fliplr(matrix).ravel())]
            tmp = np.zeros((3, 3))
            tmp[move] = 1
            tmp = np.fliplr(tmp)
            return tuple(np.argwhere(tmp == 1)[0])
        
        elif tuple(np.flipud(matrix).ravel()) in self.moves_dict.keys():
            #print('ud')
            #print(f'from {np.flipud(matrix)}')
            move = self.moves_dict[tuple(np.flipud(matrix).ravel())]
            tmp = np.zeros((3, 3))
            tmp[move] = 1
            tmp = np.flipud(tmp)
            return tuple(np.argwhere(tmp == 1)[0])

        elif tuple(np.rot90(matrix).ravel()) in self.moves_dict.keys():
            #print('rot90')
            #print(f'from {np.rot90(matrix)}')
            move = self.moves_dict[tuple(np.rot90(matrix).ravel())]
            tmp = np.zeros((3, 3))
            tmp[move] = 1
            tmp = np.rot90(np.rot90(np.rot90(tmp)))
            return tuple(np.argwhere(tmp == 1)[0])

        elif tuple(np.rot90(np.rot90(matrix)).ravel()) in self.moves_dict.keys():
            #print('rot180')
            #print(f'from {np.rot90(np.rot90(matrix))}')
            move = self.moves_dict[tuple(np.rot90(np.rot90(matrix)).ravel())]
            tmp = np.zeros((3, 3))
            tmp[move] = 1
            tmp = np.rot90(np.rot90(tmp))
            return tuple(np.argwhere(tmp == 1)[0])

        elif tuple(np.rot90(np.rot90(np.rot90(matrix))).ravel()) in self.moves_dict.keys():
            #print('rot270')
            #print(f'from {np.rot90(np.rot90(np.rot90(matrix)))}')
            move = self.moves_dict[tuple(np.rot90(np.rot90(np.rot90(matrix))).ravel())]
            tmp = np.zeros((3, 3))
            tmp[move] = 1
            tmp = np.rot90(tmp)
            return tuple(np.argwhere(tmp == 1)[0])

        else:
            print('error')
            print(self.error)

In [272]:
def game(player_1, player_2, print_moves= False):
    trajectory = list()
    matrix = np.zeros((3, 3))
    winner = 0
    while sum(sum(matrix == 0)) != 0:
        move = player_1.move(np.copy(matrix))
        if print_moves: print(f'player 1 chose move {move}')
        trajectory.append((np.copy(matrix), move))
        matrix[move] = 1
        if print_moves: print(matrix)
        if check_board(np.copy(matrix)) == 1:
            winner = 1
            break
        if sum(sum(matrix == 0)) == 0:
            break

        move = player_2.move(np.copy(matrix))
        if print_moves: print(f'player 2 chose move {move}')
        matrix[move] = 2
        if print_moves: print(matrix)
        if check_board(np.copy(matrix)) == 2:
            winner = 2
            break

    return trajectory, winner

In [274]:
EPSILON = 0.001

In [273]:
def random_games_init(player, n_tries):

    random_player = Random_player()

    for _ in tqdm(range(n_tries)):
        trajectory, winner = game(random_player, random_player)
        last_board = np.copy(trajectory[-1][0])
        last_board[trajectory[-1][1]] = winner
        final_reward = state_value(last_board)
        player.update_val_dict(trajectory, final_reward)

In [279]:
player = Player(possible_states, EPSILON)
random_games_init(player, 5_000)
player.compute_move_dict()

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

random_player = Random_player()

for _ in range(1000):

    trajectory, winner = game(player, random_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)

    trajectory, winner = game(random_player, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)
    #player = Player(moves_dict)
   

print(f'player won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

player won 956 times as first and 260 as second, total: 1216
random won 603 times as first and 22 as second, total: 625
draw games: 22 times with player first and 137 with player second: total: 159


In [281]:
player = Player(possible_states, EPSILON)
random_games_init(player, 500_000)
player.compute_move_dict()

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

random_player = Random_player()

for _ in range(1000):

    trajectory, winner = game(player, random_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)

    trajectory, winner = game(random_player, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)
    #player = Player(moves_dict)
   

print(f'player won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 500000/500000 [38:20<00:00, 217.32it/s]


player won 991 times as first and 257 as second, total: 1248
random won 615 times as first and 0 as second, total: 615
draw games: 9 times with player first and 128 with player second: total: 137
