Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [1]:
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy

from tqdm.auto import tqdm
import numpy as np

In [2]:
State = namedtuple('State', ['x', 'o'])

In [3]:
MAGIC = [2, 7, 6, 9, 5, 1, 4, 3, 8]

In [4]:
def print_board(pos):
    """Nicely prints the board"""
    for r in range(3):
        for c in range(3):
            i = r * 3 + c
            if MAGIC[i] in pos.x:
                print('X', end='')
            elif MAGIC[i] in pos.o:
                print('O', end='')
            else:
                print('.', end='')
        print()
    print()

In [5]:
def win(elements):
    """Checks is elements is winning"""
    return any(sum(c) == 15 for c in combinations(elements, 3))

def state_value(pos: State):
    """Evaluate state: +1 first player wins"""
    if win(pos.x):
        return 1
    elif win(pos.o):
        return -1
    else:
        return 0

In [6]:
class Random_player:
    def __init__(self): pass

    def move(self, state, available, player): return choice(list(available))

In [7]:
class Player_montecarlo:
    def __init__(self, moves_dict): self.moves_dict = moves_dict
        
    def move(self, state, available, player):

        if player == 0:
            ally_state = frozenset(state.x)
            enemy_state = frozenset(state.o)
        else:
            ally_state = frozenset(state.o)
            enemy_state = frozenset(state.x)
        
        if enemy_state in self.moves_dict.keys():

            for possible_configuration in self.moves_dict[enemy_state]:
                n_not_in = 0
                move = None
                for k in possible_configuration:
                    if k not in ally_state:
                        n_not_in += 1
                        move = k
                if n_not_in == 1 and move in available: return move

        #print('going random')
        return choice(list(available))

In [8]:
def game(player_1, player_2):
    trajectory = list()
    state = State(set(), set())
    available = set(range(1, 10))
    winner = 0
    while available:
        move = player_1.move(state, available, 0)
        state.x.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)
        if win(state.x):
            winner = 1
            break
        if not available:
            break

        move = player_2.move(state, available, 1)
        state.o.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)
        if win(state.o): 
            winner = 2
            break

    return trajectory, winner

In [9]:
def update_val_dict(val_dict, trajectory, reward, epsilon):
    for state in trajectory:
        hashable_state = (frozenset(state.x), frozenset(state.o))
        val_dict[hashable_state] = val_dict[
            hashable_state
        ] + epsilon * (reward - val_dict[hashable_state])

In [10]:
def compute_move_dict(val_dict):

    moves_dict = defaultdict()

    for x in sorted(val_dict.items(), key=lambda e: e[1], reverse=True):
        if x[0][1] not in moves_dict.keys(): moves_dict[x[0][1]] = []
        else: moves_dict[x[0][1]].append(x[0][0])

    return moves_dict

In [11]:
def init_val_dict(epsilon, n_tries):

    value_dictionary = defaultdict(float)
    random_player = Random_player()

    for _ in tqdm(range(n_tries)):
        trajectory, _ = game(random_player, random_player)
        final_reward = state_value(trajectory[-1])
        update_val_dict(value_dictionary, trajectory, final_reward, epsilon)

    return value_dictionary

In [12]:
def rec_x(state, available, player, moves_dict_x):

    state_val = state_value(state)
    if state_val != 0 or not available: return state_val

    if player == 0:
        max_eval = -2
        best_x = None

        for x in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(x)
            new_state.x.add(x)

            x_eval = rec_x(new_state, new_available, 1 - player, moves_dict_x)

            #max_eval = max(max_eval, x_eval)
            if x_eval > max_eval:
                max_eval = x_eval
                best_x = x

        moves_dict_x[(frozenset(state.x), frozenset(state.o))] = best_x
        
        return max_eval
    
    else:
        min_eval = 2

        for o in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(o)
            new_state.o.add(o)

            o_eval = rec_x(new_state, new_available, 1 - player, moves_dict_x)

            min_eval = min(min_eval, o_eval)

        return min_eval
    
class Player_x:
    def __init__(self):
        self.moves_dict_x = {}
        _ = self.compute_moves_x(State(set(), set()), set(range(1, 10)), self.moves_dict_x)
    
    def compute_moves_x(self, state, available, moves_dict_x):

        best_val = -2
        best_move = None

        for move in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(move)
            new_state.x.add(move)

            move_eval = rec_x(new_state, new_available, 1, moves_dict_x)

            if move_eval > best_val:
                best_val = move_eval
                best_move = move

        moves_dict_x[(frozenset(state.x), frozenset(state.o))] = best_move
        return best_move
    
    def move(self, state, available, player):
        hashable_state = (frozenset(state.x), frozenset(state.o))
        if hashable_state in self.moves_dict_x.keys(): return self.moves_dict_x[hashable_state]
        else: return self.compute_moves_x(state, available, self.moves_dict_x)

In [13]:
def rec_o(state, available, player, moves_dict_o):

    state_val = state_value(state)
    if state_val != 0 or not available: return state_val

    if player == 0:
        max_eval = -2

        for x in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(x)
            new_state.x.add(x)

            x_eval = rec_o(new_state, new_available, 1 - player, moves_dict_o)

            max_eval = max(max_eval, x_eval)
        
        return max_eval
    
    else:
        min_eval = 2
        best_o = None

        for o in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(o)
            new_state.o.add(o)

            o_eval = rec_o(new_state, new_available, 1 - player, moves_dict_o)

            #min_eval = min(min_eval, o_eval)
            if o_eval < min_eval:
                min_eval = o_eval
                best_o = o

        moves_dict_o[(frozenset(state.x), frozenset(state.o))] = best_o

        return min_eval
    
class Player_o:
    def __init__(self):
        self.moves_dict_o = {}
        _ = self.compute_moves_o(State(set(), set()), set(range(1, 10)), self.moves_dict_o)
    
    def compute_moves_o(self, state, available, moves_dict_o):

        best_val = -2
        best_move = None

        for move in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(move)
            new_state.o.add(move)

            move_eval = rec_o(new_state, new_available, 0, moves_dict_o)

            if move_eval > best_val:
                best_val = move_eval
                best_move = move

        moves_dict_o[(frozenset(state.x), frozenset(state.o))] = best_move
        return best_move
    
    def move(self, state, available, player):
        hashable_state = (frozenset(state.x), frozenset(state.o))
        if hashable_state in self.moves_dict_o.keys(): return self.moves_dict_o[hashable_state]
        else: return self.compute_moves_o(state, available, self.moves_dict_o)

In [14]:
EPSILON = 0.001

In [15]:
value_dictionary = init_val_dict(EPSILON, 5_000)

moves_dict = compute_move_dict(value_dictionary)

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player = Player_montecarlo(moves_dict)
rand_player = Random_player()

for _ in range(1000):

    _, winner = game(player, rand_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(rand_player, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'montecarlo won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 5000/5000 [00:00<00:00, 8946.29it/s]


montecarlo won 841 times as first and 184 as second, total: 1025
random won 668 times as first and 93 as second, total: 761
draw games: 66 times with player first and 148 with player second: total: 214


In [17]:
win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

rand_player = Random_player()
player_x = Player_x()
player_o = Player_o()

for _ in tqdm(range(1000)):

    _, winner = game(rand_player, player_o)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(player_x, rand_player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'random won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'minmax won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 1000/1000 [00:07<00:00, 131.85it/s]

random won 163 times as first and 0 as second, total: 163
minmax won 912 times as first and 569 as second, total: 1481
draw games: 268 times with player first and 88 with player second: total: 356





In [27]:
win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player_x = Player_x()
player_o = Player_o()

for ii in range(100):

    print(ii)
    value_dictionary = init_val_dict(EPSILON, 5_000)
    moves_dict = compute_move_dict(value_dictionary)
    player = Player_montecarlo(moves_dict)

    _, winner = game(player, player_o)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(player_x, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'montecarlo won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'minmax won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

0


100%|██████████| 5000/5000 [00:00<00:00, 9249.72it/s]


1


100%|██████████| 5000/5000 [00:00<00:00, 9248.89it/s]


2


100%|██████████| 5000/5000 [00:00<00:00, 8689.70it/s]


3


100%|██████████| 5000/5000 [00:00<00:00, 9192.88it/s]


4


100%|██████████| 5000/5000 [00:00<00:00, 9319.08it/s]


5


100%|██████████| 5000/5000 [00:00<00:00, 9230.43it/s]


6


100%|██████████| 5000/5000 [00:00<00:00, 9131.30it/s]


7


100%|██████████| 5000/5000 [00:00<00:00, 8596.39it/s]


8


100%|██████████| 5000/5000 [00:00<00:00, 9286.07it/s]


9


100%|██████████| 5000/5000 [00:00<00:00, 9241.57it/s]


10


100%|██████████| 5000/5000 [00:00<00:00, 9232.73it/s]


11


100%|██████████| 5000/5000 [00:00<00:00, 9232.73it/s]


12


100%|██████████| 5000/5000 [00:00<00:00, 8630.21it/s]


13


100%|██████████| 5000/5000 [00:00<00:00, 9162.62it/s]


14


100%|██████████| 5000/5000 [00:00<00:00, 9243.44it/s]


15


100%|██████████| 5000/5000 [00:00<00:00, 9243.07it/s]


16


100%|██████████| 5000/5000 [00:00<00:00, 9093.23it/s]


17


100%|██████████| 5000/5000 [00:00<00:00, 8568.52it/s]


18


100%|██████████| 5000/5000 [00:00<00:00, 9299.58it/s]


19


100%|██████████| 5000/5000 [00:00<00:00, 9242.44it/s]


20


100%|██████████| 5000/5000 [00:00<00:00, 9265.45it/s]


21


100%|██████████| 5000/5000 [00:00<00:00, 9299.32it/s]


22


100%|██████████| 5000/5000 [00:00<00:00, 9300.18it/s]


23


100%|██████████| 5000/5000 [00:00<00:00, 9266.89it/s]


24


100%|██████████| 5000/5000 [00:00<00:00, 9167.85it/s]


25


100%|██████████| 5000/5000 [00:00<00:00, 9266.89it/s]


26


100%|██████████| 5000/5000 [00:00<00:00, 9348.51it/s]


27


100%|██████████| 5000/5000 [00:00<00:00, 9178.75it/s]


28


100%|██████████| 5000/5000 [00:00<00:00, 9249.10it/s]


29


100%|██████████| 5000/5000 [00:00<00:00, 9217.31it/s]


30


100%|██████████| 5000/5000 [00:00<00:00, 9288.46it/s]


31


100%|██████████| 5000/5000 [00:00<00:00, 9285.42it/s]


32


100%|██████████| 5000/5000 [00:00<00:00, 9181.17it/s]


33


100%|██████████| 5000/5000 [00:00<00:00, 9114.34it/s]


34


100%|██████████| 5000/5000 [00:00<00:00, 9196.14it/s]


35


100%|██████████| 5000/5000 [00:00<00:00, 9260.84it/s]


36


100%|██████████| 5000/5000 [00:00<00:00, 8632.74it/s]


37


100%|██████████| 5000/5000 [00:00<00:00, 9178.26it/s]


38


100%|██████████| 5000/5000 [00:00<00:00, 9305.90it/s]


39


100%|██████████| 5000/5000 [00:00<00:00, 9115.20it/s]


40


100%|██████████| 5000/5000 [00:00<00:00, 9310.03it/s]


41


100%|██████████| 5000/5000 [00:00<00:00, 8538.62it/s]


42


100%|██████████| 5000/5000 [00:00<00:00, 9166.76it/s]


43


100%|██████████| 5000/5000 [00:00<00:00, 9245.40it/s]


44


100%|██████████| 5000/5000 [00:00<00:00, 9145.45it/s]


45


100%|██████████| 5000/5000 [00:00<00:00, 9124.37it/s]


46


100%|██████████| 5000/5000 [00:00<00:00, 8511.16it/s]


47


100%|██████████| 5000/5000 [00:00<00:00, 9164.90it/s]


48


100%|██████████| 5000/5000 [00:00<00:00, 9249.87it/s]


49


100%|██████████| 5000/5000 [00:00<00:00, 8870.46it/s]


50


100%|██████████| 5000/5000 [00:00<00:00, 9090.62it/s]


51


100%|██████████| 5000/5000 [00:00<00:00, 8425.15it/s]


52


100%|██████████| 5000/5000 [00:00<00:00, 9055.26it/s]


53


100%|██████████| 5000/5000 [00:00<00:00, 9097.03it/s]


54


100%|██████████| 5000/5000 [00:00<00:00, 9003.37it/s]


55


100%|██████████| 5000/5000 [00:00<00:00, 9133.75it/s]


56


100%|██████████| 5000/5000 [00:00<00:00, 8373.70it/s]


57


100%|██████████| 5000/5000 [00:00<00:00, 9038.58it/s]


58


100%|██████████| 5000/5000 [00:00<00:00, 8893.47it/s]


59


100%|██████████| 5000/5000 [00:00<00:00, 9113.03it/s]


60


100%|██████████| 5000/5000 [00:00<00:00, 9021.78it/s]


61


100%|██████████| 5000/5000 [00:00<00:00, 8446.86it/s]


62


100%|██████████| 5000/5000 [00:00<00:00, 8984.53it/s]


63


100%|██████████| 5000/5000 [00:00<00:00, 9109.88it/s]


64


100%|██████████| 5000/5000 [00:00<00:00, 8793.62it/s]


65


100%|██████████| 5000/5000 [00:00<00:00, 9060.88it/s]


66


100%|██████████| 5000/5000 [00:00<00:00, 8341.71it/s]


67


100%|██████████| 5000/5000 [00:00<00:00, 8945.39it/s]


68


100%|██████████| 5000/5000 [00:00<00:00, 9031.23it/s]


69


100%|██████████| 5000/5000 [00:00<00:00, 8915.45it/s]


70


100%|██████████| 5000/5000 [00:00<00:00, 8991.61it/s]


71


100%|██████████| 5000/5000 [00:00<00:00, 9032.36it/s]


72


100%|██████████| 5000/5000 [00:00<00:00, 8939.23it/s]


73


100%|██████████| 5000/5000 [00:00<00:00, 9128.58it/s]


74


100%|██████████| 5000/5000 [00:00<00:00, 9110.31it/s]


75


100%|██████████| 5000/5000 [00:00<00:00, 9081.62it/s]


76


100%|██████████| 5000/5000 [00:00<00:00, 9093.85it/s]


77


100%|██████████| 5000/5000 [00:00<00:00, 8777.99it/s]


78


100%|██████████| 5000/5000 [00:00<00:00, 8980.97it/s]


79


100%|██████████| 5000/5000 [00:00<00:00, 9114.72it/s]


80


100%|██████████| 5000/5000 [00:00<00:00, 8449.94it/s]


81


100%|██████████| 5000/5000 [00:00<00:00, 9143.03it/s]


82


100%|██████████| 5000/5000 [00:00<00:00, 8958.21it/s]


83


100%|██████████| 5000/5000 [00:00<00:00, 8781.43it/s]


84


100%|██████████| 5000/5000 [00:00<00:00, 8917.57it/s]


85


100%|██████████| 5000/5000 [00:00<00:00, 7630.70it/s]


86


100%|██████████| 5000/5000 [00:00<00:00, 8034.23it/s]


87


100%|██████████| 5000/5000 [00:00<00:00, 8369.54it/s]


88


100%|██████████| 5000/5000 [00:00<00:00, 8356.52it/s]


89


100%|██████████| 5000/5000 [00:00<00:00, 8463.94it/s]


90


100%|██████████| 5000/5000 [00:00<00:00, 7538.91it/s]


91


100%|██████████| 5000/5000 [00:00<00:00, 8451.59it/s]


92


100%|██████████| 5000/5000 [00:00<00:00, 8609.29it/s]


93


100%|██████████| 5000/5000 [00:00<00:00, 8764.14it/s]


94


100%|██████████| 5000/5000 [00:00<00:00, 8962.80it/s]


95


100%|██████████| 5000/5000 [00:00<00:00, 8382.15it/s]


96


100%|██████████| 5000/5000 [00:00<00:00, 8963.39it/s]


97


100%|██████████| 5000/5000 [00:00<00:00, 9037.28it/s]


98


100%|██████████| 5000/5000 [00:00<00:00, 9080.26it/s]


99


100%|██████████| 5000/5000 [00:00<00:00, 9030.21it/s]

montecarlo won 41 times as first and 0 as second, total: 41
minmax won 88 times as first and 29 as second, total: 117
draw games: 30 times with player first and 12 with player second: total: 42





In [52]:
value_dictionary = init_val_dict(EPSILON, 5_000_000)

moves_dict = compute_move_dict(value_dictionary)

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player = Player_montecarlo(moves_dict)
rand_player = Random_player()

for _ in range(1000):

    _, winner = game(player, rand_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(rand_player, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'montecarlo won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 5000000/5000000 [09:32<00:00, 8741.12it/s]


montecarlo won 966 times as first and 259 as second, total: 1225
random won 352 times as first and 9 as second, total: 361
draw games: 25 times with player first and 389 with player second: total: 414


In [24]:
win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player_x = Player_x()
player_o = Player_o()

for ii in range(10):

    print(ii)
    value_dictionary = init_val_dict(EPSILON, 5_000_000)
    moves_dict = compute_move_dict(value_dictionary)
    player = Player_montecarlo(moves_dict)

    _, winner = game(player, player_o)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(player_x, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'montecarlo won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'minmax won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

0


100%|██████████| 5000000/5000000 [08:40<00:00, 9600.92it/s]


1


100%|██████████| 5000000/5000000 [08:43<00:00, 9556.85it/s] 


2


100%|██████████| 5000000/5000000 [09:04<00:00, 9187.90it/s]


3


100%|██████████| 5000000/5000000 [09:09<00:00, 9093.34it/s]


4


100%|██████████| 5000000/5000000 [09:15<00:00, 8995.43it/s]


5


100%|██████████| 5000000/5000000 [09:15<00:00, 8996.08it/s]


6


100%|██████████| 5000000/5000000 [09:16<00:00, 8979.27it/s]


7


100%|██████████| 5000000/5000000 [09:14<00:00, 9025.05it/s]


8


100%|██████████| 5000000/5000000 [08:51<00:00, 9408.79it/s]


9


100%|██████████| 5000000/5000000 [08:43<00:00, 9558.76it/s]

montecarlo won 8 times as first and 0 as second, total: 8
minmax won 8 times as first and 0 as second, total: 8
draw games: 2 times with player first and 2 with player second: total: 4



