**Authors**:  
[`Marcello Vitaggio`](https://github.com/Kalller/computational-intelligence) `<s318904@studenti.polito.it>`  
[`Giovanni Squillero`](https://github.com/squillero/computational-intelligence) `<giovanni.squillero@polito.it>`  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/Kalller/computational-intelligence/blob/main/LICENSE.md) for details.  

In [2]:
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy

from tqdm.auto import tqdm
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# LAB10 (LAB4)

Use reinforcement learning to devise a tic-tac-toe player.

### Deadlines:

* Submission: [Dies Natalis Solis Invicti](https://en.wikipedia.org/wiki/Sol_Invictus)
* Reviews: [Befana](https://en.wikipedia.org/wiki/Befana)

In [3]:
State = namedtuple('State', ['x', 'o'])
MAGIC = [2, 7, 6, 9, 5, 1, 4, 3, 8]

In [79]:
def print_board(pos):
    """Nicely prints the board with emojis for X and O"""
    for r in range(3):
        for c in range(3):
            i = r * 3 + c
            if MAGIC[i] in pos.x:
                print('❌', end='')  
            elif MAGIC[i] in pos.o:
                print('⭕', end='')  
            else:
                print('⬜', end='')  
        print()
    print()

def win(elements):
    """Checks if elements is winning"""
    return any(sum(c) == 15 for c in combinations(elements, 3))

def state_value(pos: State, p):
    
    if p == 'X':
        if win(pos.x):
            return 1
        elif win(pos.o):
            return -1
    elif p == 'O':
        if win(pos.x):
            return -1
        elif win(pos.o):
            return 1
    return 0

In [5]:
def random_game():
    trajectory = list()
    state = State(set(), set())
    available = set(range(1, 9+1))
    while available:
        x = choice(list(available))
        state.x.add(x)
        trajectory.append(deepcopy(state))
        available.remove(x)
        if win(state.x) or not available:
            break

        o = choice(list(available))
        state.o.add(o)
        trajectory.append(deepcopy(state))
        available.remove(o)
        if win(state.o):
            break
    return trajectory

In [155]:
def play_multiple_games(agent1, agent2, num_games):
    wins_agent1 = 0
    wins_agent2 = 0

    for _ in tqdm(range(num_games)):
        state = State(set(), set())
        player = 'X'  
        while True:
            if player == 'X':
                state = agent1(state, player)
            else:
                state = agent2(state, player)

            if win(state.x):
                wins_agent1 += 1
                break
            elif win(state.o):
                wins_agent2 += 1
                break
            elif len(state.x) + len(state.o) == 9:
                break

            player = 'O' if player == 'X' else 'X'
    
    total_games = wins_agent1 + wins_agent2
    win_percentage_agent1 = (wins_agent1 / total_games) * 100
    win_percentage_agent2 = (wins_agent2 / total_games) * 100

    print(f"Agent 1 win percentage: {win_percentage_agent1:.2f}%")
    print(f"Agent 2 win percentage: {win_percentage_agent2:.2f}%")


In [7]:
def play_game(agent1, agent2):
    state = State(set(), set())
    player = 'X'
    print_board(state)
    while True:
        if player == 'X':
            state = agent1(state, player)
        else:
            state = agent2(state, player)
        print_board(state)
        if win(state.x):
            print("X wins!")
            break
        elif win(state.o):
            print("O wins!")
            break
        elif len(state.x) + len(state.o) == 9:
            print("It's a draw!")
            break
        player = 'O' if player == 'X' else 'X'

In [8]:
def random_agent(state, player):
    # listing the current available moves
    available_moves = [i for i in MAGIC if i not in state.x and i not in state.o]

    # picking one randomly
    move = choice(available_moves)
    if player == 'X':
        state.x.add(move)
    else:
        state.o.add(move)
    return state

In [150]:
value_dictionaries = {'X': defaultdict(float), 'O': defaultdict(float)}
epsilon = 0.001
eps = 500_000

for player in ['X', 'O']:
    for steps in tqdm(range(eps)):
        trajectory = random_game()
        final_reward = state_value(trajectory[-1], player)
        for state in trajectory:
            hashable_state = (frozenset(state.x), frozenset(state.o))
            value_dictionaries[player][hashable_state] += epsilon * (final_reward - value_dictionaries[player][hashable_state])


100%|██████████| 500000/500000 [00:49<00:00, 10170.94it/s]
100%|██████████| 500000/500000 [00:49<00:00, 10163.99it/s]


In [158]:
def rl_agent(state, player):
    available_moves = [i for i in MAGIC if i not in state.x and i not in state.o]
    best_value = float("-inf")
    best_move = None

    player_dict = value_dictionaries[player]  # Access the specific player's dictionary

    for move in available_moves:
        next_state = State(set(state.x), set(state.o))
        if player == 'X':
            next_state.x.add(move)
        else:
            next_state.o.add(move)

        hashable_state = (frozenset(next_state.x), frozenset(next_state.o))
        state_value = player_dict.get(hashable_state, 0)  # Retrieve value from the player's dictionary

        if state_value > best_value:
            best_value = state_value
            best_move = move
        
    if player == 'X':
        state.x.add(best_move)
    else:
        state.o.add(best_move)

    return state


In [160]:
play_multiple_games(random_agent,rl_agent,10000)
play_multiple_games(rl_agent,random_agent,10000)

100%|██████████| 10000/10000 [00:00<00:00, 36316.24it/s]


Agent 1 win percentage: 4.02%
Agent 2 win percentage: 95.98%


100%|██████████| 10000/10000 [00:00<00:00, 39177.21it/s]

Agent 1 win percentage: 100.00%
Agent 2 win percentage: 0.00%





In [154]:
play_game(random_agent,rl_agent)

⬜⬜⬜
⬜⬜⬜
⬜⬜⬜

⬜⬜❌
⬜⬜⬜
⬜⬜⬜

⬜⬜❌
⬜⭕⬜
⬜⬜⬜

⬜⬜❌
⬜⭕❌
⬜⬜⬜

⬜⬜❌
⬜⭕❌
⬜⬜⭕

❌⬜❌
⬜⭕❌
⬜⬜⭕

❌⭕❌
⬜⭕❌
⬜⬜⭕

❌⭕❌
⬜⭕❌
❌⬜⭕

❌⭕❌
⬜⭕❌
❌⭕⭕

O wins!
