Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [31]:
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy

from tqdm.auto import tqdm
import numpy as np

In [32]:
State = namedtuple('State', ['x', 'o'])

In [33]:
MAGIC = [2, 7, 6, 9, 5, 1, 4, 3, 8]

In [34]:
def print_board(pos):
    """Nicely prints the board"""
    for r in range(3):
        for c in range(3):
            i = r * 3 + c
            if MAGIC[i] in pos.x:
                print('X', end='')
            elif MAGIC[i] in pos.o:
                print('O', end='')
            else:
                print('.', end='')
        print()
    print()

In [35]:
def win(elements):
    """Checks is elements is winning"""
    return any(sum(c) == 15 for c in combinations(elements, 3))

def state_value(pos: State):
    """Evaluate state: +1 first player wins"""
    if win(pos.x):
        return 1
    elif win(pos.o):
        return -1
    else:
        return 0

In [36]:
class Random_player:
    def __init__(self): pass

    def move(self, state, available, player): return choice(list(available))

In [37]:
class Player_montecarlo:
    def __init__(self, moves_dict): self.moves_dict = moves_dict
        
    def move(self, state, available, player):

        if player == 0:
            ally_state = frozenset(state.x)
            enemy_state = frozenset(state.o)
        else:
            ally_state = frozenset(state.o)
            enemy_state = frozenset(state.x)
        
        if enemy_state in self.moves_dict.keys():

            for possible_configuration in self.moves_dict[enemy_state]:
                n_not_in = 0
                move = None
                for k in possible_configuration:
                    if k not in ally_state:
                        n_not_in += 1
                        move = k
                if n_not_in == 1 and move in available: return move

        #print('going random')
        return choice(list(available))

In [38]:
def game(player_1, player_2):
    trajectory = list()
    state = State(set(), set())
    available = set(range(1, 10))
    winner = 0
    while available:
        move = player_1.move(state, available, 0)
        state.x.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)
        if win(state.x):
            winner = 1
            break
        if not available:
            break

        move = player_2.move(state, available, 1)
        state.o.add(move)
        trajectory.append(deepcopy(state))
        available.remove(move)
        if win(state.o): 
            winner = 2
            break

    return trajectory, winner

In [39]:
def update_val_dict(val_dict, trajectory, reward, epsilon):
    for state in trajectory:
        hashable_state = (frozenset(state.x), frozenset(state.o))
        val_dict[hashable_state] = val_dict[
            hashable_state
        ] + epsilon * (reward - val_dict[hashable_state])

In [40]:
def compute_move_dict(val_dict):

    moves_dict = defaultdict()

    for x in sorted(val_dict.items(), key=lambda e: e[1], reverse=True):
        if x[0][1] not in moves_dict.keys(): moves_dict[x[0][1]] = []
        else: moves_dict[x[0][1]].append(x[0][0])

    return moves_dict

In [41]:
def init_val_dict(epsilon, n_tries):

    value_dictionary = defaultdict(float)
    random_player = Random_player()

    for _ in tqdm(range(n_tries)):
        trajectory, _ = game(random_player, random_player)
        final_reward = state_value(trajectory[-1])
        update_val_dict(value_dictionary, trajectory, final_reward, epsilon)

    return value_dictionary

In [42]:
def rec_x(state, available, player, moves_dict_x):

    state_val = state_value(state)
    if state_val != 0 or not available: return state_val

    if player == 0:
        max_eval = -2
        best_x = None

        for x in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(x)
            new_state.x.add(x)

            x_eval = rec_x(new_state, new_available, 1 - player, moves_dict_x)

            #max_eval = max(max_eval, x_eval)
            if x_eval > max_eval:
                max_eval = x_eval
                best_x = x

        moves_dict_x[(frozenset(state.x), frozenset(state.o))] = best_x
        
        return max_eval
    
    else:
        min_eval = 2

        for o in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(o)
            new_state.o.add(o)

            o_eval = rec_x(new_state, new_available, 1 - player, moves_dict_x)

            min_eval = min(min_eval, o_eval)

        return min_eval
    
class Player_x:
    def __init__(self):
        self.moves_dict_x = {}
        _ = self.compute_moves_x(State(set(), set()), set(range(1, 10)), self.moves_dict_x)
    
    def compute_moves_x(self, state, available, moves_dict_x):

        best_val = -2
        best_move = None

        for move in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(move)
            new_state.x.add(move)

            move_eval = rec_x(new_state, new_available, 1, moves_dict_x)

            if move_eval > best_val:
                best_val = move_eval
                best_move = move

        moves_dict_x[(frozenset(state.x), frozenset(state.o))] = best_move
        return best_move
    
    def move(self, state, available, player):
        hashable_state = (frozenset(state.x), frozenset(state.o))
        if hashable_state in self.moves_dict_x.keys(): return self.moves_dict_x[hashable_state]
        else: return self.compute_moves_x(state, available, self.moves_dict_x)

In [43]:
def rec_o(state, available, player, moves_dict_o):

    state_val = state_value(state)
    if state_val != 0 or not available: return state_val

    if player == 0:
        max_eval = -2

        for x in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(x)
            new_state.x.add(x)

            x_eval = rec_o(new_state, new_available, 1 - player, moves_dict_o)

            max_eval = max(max_eval, x_eval)
        
        return max_eval
    
    else:
        min_eval = 2
        best_o = None

        for o in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(o)
            new_state.o.add(o)

            o_eval = rec_o(new_state, new_available, 1 - player, moves_dict_o)

            #min_eval = min(min_eval, o_eval)
            if o_eval < min_eval:
                min_eval = o_eval
                best_o = o

        moves_dict_o[(frozenset(state.x), frozenset(state.o))] = best_o

        return min_eval
    
class Player_o:
    def __init__(self):
        self.moves_dict_o = {}
        _ = self.compute_moves_o(State(set(), set()), set(range(1, 10)), self.moves_dict_o)
    
    def compute_moves_o(self, state, available, moves_dict_o):

        best_val = -2
        best_move = None

        for move in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(move)
            new_state.o.add(move)

            move_eval = rec_o(new_state, new_available, 0, moves_dict_o)

            if move_eval > best_val:
                best_val = move_eval
                best_move = move

        moves_dict_o[(frozenset(state.x), frozenset(state.o))] = best_move
        return best_move
    
    def move(self, state, available, player):
        hashable_state = (frozenset(state.x), frozenset(state.o))
        if hashable_state in self.moves_dict_o.keys(): return self.moves_dict_o[hashable_state]
        else: return self.compute_moves_o(state, available, self.moves_dict_o)

In [44]:
EPSILON = 0.001

In [48]:
value_dictionary = init_val_dict(EPSILON, 5_000)

moves_dict = compute_move_dict(value_dictionary)

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player = Player_montecarlo(moves_dict)
rand_player = Random_player()

for _ in range(1000):

    _, winner = game(player, rand_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(rand_player, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'montecarlo won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 5000/5000 [00:00<00:00, 8764.67it/s]


montecarlo won 855 times as first and 226 as second, total: 1081
random won 558 times as first and 64 as second, total: 622
draw games: 81 times with player first and 216 with player second: total: 297


In [53]:
win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

rand_player = Random_player()
player_x = Player_x()
player_o = Player_o()

for _ in range(1000):

    _, winner = game(rand_player, player_o)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(player_x, rand_player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'random won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'minmax won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

random won 157 times as first and 0 as second, total: 157
minmax won 899 times as first and 593 as second, total: 1492
draw games: 250 times with player first and 101 with player second: total: 351


In [51]:
value_dictionary = init_val_dict(EPSILON, 5_000)

moves_dict = compute_move_dict(value_dictionary)

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player = Player_montecarlo(moves_dict)
player_x = Player_x()
player_o = Player_o()

for _ in range(1000):

    _, winner = game(player, player_o)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(player_x, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'montecarlo won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'minmax won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 5000/5000 [00:00<00:00, 8688.69it/s]


montecarlo won 0 times as first and 0 as second, total: 0
minmax won 0 times as first and 0 as second, total: 0
draw games: 1000 times with player first and 1000 with player second: total: 2000


In [52]:
value_dictionary = init_val_dict(EPSILON, 5_000_000)

moves_dict = compute_move_dict(value_dictionary)

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player = Player_montecarlo(moves_dict)
rand_player = Random_player()

for _ in range(1000):

    _, winner = game(player, rand_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(rand_player, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'montecarlo won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 5000000/5000000 [09:32<00:00, 8741.12it/s]


montecarlo won 966 times as first and 259 as second, total: 1225
random won 352 times as first and 9 as second, total: 361
draw games: 25 times with player first and 389 with player second: total: 414


In [54]:
value_dictionary = init_val_dict(EPSILON, 5_000_000)

moves_dict = compute_move_dict(value_dictionary)

win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player = Player_montecarlo(moves_dict)
player_x = Player_x()
player_o = Player_o()

for _ in range(1000):

    _, winner = game(player, player_o)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    _, winner = game(player_x, player)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'montecarlo won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'minmax won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 5000000/5000000 [09:55<00:00, 8389.71it/s]


montecarlo won 1000 times as first and 0 as second, total: 1000
minmax won 1000 times as first and 0 as second, total: 1000
draw games: 0 times with player first and 0 with player second: total: 0
