Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [179]:
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy

from tqdm.auto import tqdm
import numpy as np

In [180]:
State = namedtuple('State', ['x', 'o'])

In [181]:
MAGIC = [2, 7, 6, 9, 5, 1, 4, 3, 8]

In [182]:
def print_board(pos):
    """Nicely prints the board"""
    for r in range(3):
        for c in range(3):
            i = r * 3 + c
            if MAGIC[i] in pos.x:
                print('X', end='')
            elif MAGIC[i] in pos.o:
                print('O', end='')
            else:
                print('.', end='')
        print()
    print()

In [183]:
def win(elements):
    """Checks is elements is winning"""
    return any(sum(c) == 15 for c in combinations(elements, 3))

def state_value(pos: State):
    """Evaluate state: +1 first player wins"""
    if win(pos.x):
        return 1
    elif win(pos.o):
        return -1
    else:
        return 0

In [184]:
class Random_player:
    def __init__(self): pass

    def move(self, ally_state, enemy_state, available): return choice(list(available))

In [185]:
class Player:
    def __init__(self, moves_dict): self.moves_dict = moves_dict
        
    def move(self, ally_state, enemy_state, available):

        ally_state = frozenset(ally_state)
        enemy_state = frozenset(enemy_state)
        
        if enemy_state in self.moves_dict.keys():

            for possible_configuration in self.moves_dict[enemy_state]:
                n_not_in = 0
                move = None
                for k in possible_configuration:
                    if k not in ally_state:
                        n_not_in += 1
                        move = k
                if n_not_in == 1 and move in available: return move

        #print('going random')
        return choice(list(available))

In [186]:
def game(player_1, player_2):
    trajectory = list()
    state = State(set(), set())
    available = set(range(1, 10))
    winner = 0
    while available:
        move = player_1.move(state.x, state.o, available)
        state.x.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)
        if win(state.x):
            winner = 1
            break
        if not available:
            break

        move = player_2.move(state.o, state.x, available)
        state.o.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)
        if win(state.o): 
            winner = 2
            break

    return trajectory, winner

In [187]:
def update_val_dict(val_dict, trajectory, reward, epsilon):
    for state in trajectory:
        hashable_state = (frozenset(state.x), frozenset(state.o))
        val_dict[hashable_state] = val_dict[
            hashable_state
        ] + epsilon * (reward - val_dict[hashable_state])

In [188]:
def compute_move_dict(val_dict):

    moves_dict = defaultdict()

    for x in sorted(val_dict.items(), key=lambda e: e[1], reverse=True):
        if x[0][1] not in moves_dict.keys(): moves_dict[x[0][1]] = []
        else: moves_dict[x[0][1]].append(x[0][0])

    return moves_dict

In [189]:
def init_val_dict(epsilon, n_tries):

    value_dictionary = defaultdict(float)
    random_player = Random_player()

    for _ in tqdm(range(n_tries)):
        trajectory, _ = game(random_player, random_player)
        final_reward = state_value(trajectory[-1])
        update_val_dict(value_dictionary, trajectory, final_reward, epsilon)

    return value_dictionary

In [190]:
EPSILON = 0.001

In [191]:
value_dictionary = init_val_dict(EPSILON, 5_000)

moves_dict = compute_move_dict(value_dictionary)

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player = Player(moves_dict)
random_player = Random_player()

for _ in range(1000):

    trajectory, winner = game(player, random_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)

    trajectory, winner = game(random_player, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)
    #player = Player(moves_dict)
   

print(f'player won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 5000/5000 [00:00<00:00, 9213.13it/s]


player won 878 times as first and 218 as second, total: 1096
random won 585 times as first and 63 as second, total: 648
draw games: 59 times with player first and 197 with player second: total: 256


In [192]:
value_dictionary = init_val_dict(EPSILON, 5_000_000)

moves_dict = compute_move_dict(value_dictionary)

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player = Player(moves_dict)
random_player = Random_player()

for _ in range(1000):

    trajectory, winner = game(player, random_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)

    trajectory, winner = game(random_player, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)
    #player = Player(moves_dict)
   

print(f'player won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 5000000/5000000 [08:47<00:00, 9486.20it/s]


player won 965 times as first and 284 as second, total: 1249
random won 332 times as first and 9 as second, total: 341
draw games: 26 times with player first and 384 with player second: total: 410


In [193]:
value_dictionary = init_val_dict(EPSILON, 50_000_000)

moves_dict = compute_move_dict(value_dictionary)

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player = Player(moves_dict)
random_player = Random_player()

for _ in range(1000):

    trajectory, winner = game(player, random_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)

    trajectory, winner = game(random_player, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1

    #if winner == 1: update_val_dict(value_dictionary, trajectory, 1, EPSILON)
    #elif winner == 2: update_val_dict(value_dictionary, trajectory, -1, EPSILON)
    #moves_dict = compute_move_dict(value_dictionary)
    #player = Player(moves_dict)
   

print(f'player won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 50000000/50000000 [1:31:19<00:00, 9124.67it/s]


player won 990 times as first and 363 as second, total: 1353
random won 317 times as first and 1 as second, total: 318
draw games: 9 times with player first and 320 with player second: total: 329
