Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [11]:
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy

from tqdm.auto import tqdm
import numpy as np

In [12]:
State = namedtuple('State', ['x', 'o'])

In [13]:
MAGIC = [2, 7, 6, 9, 5, 1, 4, 3, 8]

In [14]:
def print_board(pos):
    """Nicely prints the board"""
    for r in range(3):
        for c in range(3):
            i = r * 3 + c
            if MAGIC[i] in pos.x:
                print('X', end='')
            elif MAGIC[i] in pos.o:
                print('O', end='')
            else:
                print('.', end='')
        print()
    print()

In [15]:
def win(elements):
    """Checks is elements is winning"""
    return any(sum(c) == 15 for c in combinations(elements, 3))

def state_value(pos: State):
    """Evaluate state: +1 first player wins"""
    if win(pos.x):
        return 1
    elif win(pos.o):
        return -1
    else:
        return 0

In [16]:
class Random_player:
    def __init__(self): pass

    def move(self, state, available): return choice(list(available))

In [17]:
def rec_x(state, available, player, moves_dict_x):

    state_val = state_value(state)
    if state_val != 0 or not available: return state_val

    if player == 0:
        max_eval = -2
        best_x = None

        for x in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(x)
            new_state.x.add(x)

            x_eval = rec_x(new_state, new_available, 1 - player, moves_dict_x)

            #max_eval = max(max_eval, x_eval)
            if x_eval > max_eval:
                max_eval = x_eval
                best_x = x

        moves_dict_x[(frozenset(state.x), frozenset(state.o))] = best_x
        
        return max_eval
    
    else:
        min_eval = 2

        for o in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(o)
            new_state.o.add(o)

            o_eval = rec_x(new_state, new_available, 1 - player, moves_dict_x)

            min_eval = min(min_eval, o_eval)

        return min_eval
    
class Player_x:
    def __init__(self):
        self.moves_dict_x = {}
        _ = self.compute_moves_x(State(set(), set()), set(range(1, 10)), self.moves_dict_x)
    
    def compute_moves_x(self, state, available, moves_dict_x):

        best_val = -2
        best_move = None

        for move in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(move)
            new_state.x.add(move)

            move_eval = rec_x(new_state, new_available, 1, moves_dict_x)

            if move_eval > best_val:
                best_val = move_eval
                best_move = move

        moves_dict_x[(frozenset(state.x), frozenset(state.o))] = best_move
        return best_move
    
    def move(self, state, available):
        hashable_state = (frozenset(state.x), frozenset(state.o))
        if hashable_state in self.moves_dict_x.keys(): return self.moves_dict_x[hashable_state]
        else: return self.compute_moves_x(state, available, self.moves_dict_x)

In [18]:
def rec_o(state, available, player, moves_dict_o):

    state_val = state_value(state)
    if state_val != 0 or not available: return state_val

    if player == 0:
        max_eval = -2

        for x in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(x)
            new_state.x.add(x)

            x_eval = rec_o(new_state, new_available, 1 - player, moves_dict_o)

            max_eval = max(max_eval, x_eval)
        
        return max_eval
    
    else:
        min_eval = 2
        best_o = None

        for o in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(o)
            new_state.o.add(o)

            o_eval = rec_o(new_state, new_available, 1 - player, moves_dict_o)

            #min_eval = min(min_eval, o_eval)
            if o_eval < min_eval:
                min_eval = o_eval
                best_o = o

        moves_dict_o[(frozenset(state.x), frozenset(state.o))] = best_o

        return min_eval
    
class Player_o:
    def __init__(self):
        self.moves_dict_o = {}
        _ = self.compute_moves_o(State(set(), set()), set(range(1, 10)), self.moves_dict_o)
    
    def compute_moves_o(self, state, available, moves_dict_o):

        best_val = -2
        best_move = None

        for move in available:

            new_state = deepcopy(state)
            new_available = available.copy()
            new_available.remove(move)
            new_state.o.add(move)

            move_eval = rec_o(new_state, new_available, 0, moves_dict_o)

            if move_eval > best_val:
                best_val = move_eval
                best_move = move

        moves_dict_o[(frozenset(state.x), frozenset(state.o))] = best_move
        return best_move
    
    def move(self, state, available):
        hashable_state = (frozenset(state.x), frozenset(state.o))
        if hashable_state in self.moves_dict_o.keys(): return self.moves_dict_o[hashable_state]
        else: return self.compute_moves_o(state, available, self.moves_dict_o)

In [19]:
def game(player_1, player_2):

    state = State(set(), set())
    available = set(range(1, 10))
    winner = 0

    while available:

        move = player_1.move(state, available)
        state.x.add(move)
        available.remove(move)

        if win(state.x):
            winner = 1
            break
        if not available:
            break

        move = player_2.move(state, available)
        state.o.add(move)
        available.remove(move)
        
        if win(state.o): 
            winner = 2
            break

    return winner

In [20]:
win_player_first = 0
win_rand_first = 0
win_player_second = 0
win_rand_second = 0
draw_player_first = 0
draw_player_second = 0

player_x = Player_x()
player_o = Player_o()
random_player = Random_player()

for _ in tqdm(range(1000)):

    winner = game(player_x, random_player)
    if winner == 1: win_player_first += 1
    elif winner == 2: win_rand_second += 1
    else: draw_player_first += 1

    winner = game(random_player, player_o)
    if winner == 2: win_player_second += 1
    elif winner == 1: win_rand_first += 1
    else: draw_player_second += 1
   

print(f'player won {win_player_first} times as first and {win_player_second} as second, total: {win_player_first + win_player_second}')
print(f'random won {win_rand_first} times as first and {win_rand_second} as second, total: {win_rand_first + win_rand_second}')
print(f'draw games: {draw_player_first} times with player first and {draw_player_second} with player second: total: {draw_player_first + draw_player_second}')

100%|██████████| 1000/1000 [00:08<00:00, 116.18it/s]

player won 916 times as first and 591 as second, total: 1507
random won 168 times as first and 0 as second, total: 168
draw games: 84 times with player first and 241 with player second: total: 325



