Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [1]:
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy

from tqdm.auto import tqdm
import numpy as np

In [2]:
State = namedtuple('State', ['x', 'o'])

In [3]:
MAGIC = [2, 7, 6, 9, 5, 1, 4, 3, 8]

In [4]:
def print_board(pos):
    """Nicely prints the board"""
    for r in range(3):
        for c in range(3):
            i = r * 3 + c
            if MAGIC[i] in pos.x:
                print('X', end='')
            elif MAGIC[i] in pos.o:
                print('O', end='')
            else:
                print('.', end='')
        print()
    print()

In [5]:
def win(elements):
    """Checks is elements is winning"""
    return any(sum(c) == 15 for c in combinations(elements, 3))

def state_value(pos: State):
    """Evaluate state: +1 first player wins"""
    if win(pos.x):
        return 1
    elif win(pos.o):
        return -1
    else:
        return 0
    
    

In [6]:
def random_game(starting_player):
    trajectory = list()
    state = State(set(), set())
    available = set(range(1, 9+1))
    players = [state.x, state.o] if starting_player == 'x' else [state.o, state.x]
    while available:
        for player in players:
            move = choice(list(available))
            player.add(move)
            trajectory.append(deepcopy(state))
            available.remove(move)
            if win(player) or not available:
                return trajectory

In [7]:
value_dictionary = defaultdict(float)
hit_state = defaultdict(int)
epsilon = 0.001
starting_player = 'x'
for steps in tqdm(range(500_000)):
    if starting_player == 'x':
        starting_player = 'o'
    else:
        starting_player = 'x'
    trajectory = random_game(starting_player)
    final_reward = state_value(trajectory[-1])
    for state in trajectory:
        hashable_state = (frozenset(state.x), frozenset(state.o))
        hit_state[hashable_state] += 1
        value_dictionary[hashable_state] = value_dictionary[
            hashable_state
        ] + epsilon * (final_reward - value_dictionary[hashable_state])

  0%|          | 0/500000 [00:00<?, ?it/s]

In [8]:
def test_players(value_dictionary, starting_player, epsilon=0.001):
    state = State(set(), set())
    available = set(range(1, 9+1))
    players = [state.x, state.o] if starting_player == 'x' else [state.o, state.x]

    while available:
        for player in players:
            if player == state.x:
                # Random player's move
                move = choice(list(available))
            else:
                # Player using value_dictionary's move
                hashable_state = (frozenset(state.x), frozenset(state.o))
                possible_moves = [move for move in available if (move,) not in hashable_state]
                move = max(possible_moves, key=lambda move: value_dictionary[hashable_state + ((move,),)])

            player.add(move)
            available.remove(move)

            # Check for win or end of the game
            if win(player) or not available:
                return state

    return state

In [9]:
def play_multiple_games(num_games, value_dictionary, epsilon=0.001):
    wins_x = 0
    wins_o = 0
    draws = 0
    starting_player = 'x'
    for _ in range(num_games):
        if starting_player == 'x':
            starting_player = 'o'
        else:
            starting_player = 'x'
        final_state = test_players(value_dictionary, epsilon)

        if win(final_state.x):
            wins_x += 1
        elif win(final_state.o):
            wins_o += 1
        else:
            draws += 1

    print(f"Results after {num_games} games:")
    print(f"Player X wins: {wins_x}")
    print(f"Player O wins: {wins_o}")
    print(f"Draws: {draws}")


# Play 100 games and print statistics
play_multiple_games(100, value_dictionary)


Results after 100 games:
Player X wins: 19
Player O wins: 62
Draws: 19
