Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB10

Use reinforcement learning to devise a tic-tac-toe player.

### Deadlines:

* Submission: Sunday, December 17 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Dies Natalis Solis Invicti ([CET](https://en.wikipedia.org/wiki/Sol_Invictus))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

AGENT BASIC 1 - Tecnica base di reinforcement learning, senza aver studiato una parte teorica sull'argomento, aver sviluppato particolari shortcut sulla rappresentazione del problema o aver visto la lezione di aiuto del professore. Tutta "by my self", con successo in media intorno al 85%

In [312]:
import logging
import random
from copy import deepcopy, copy
from tqdm import tqdm
from matplotlib import pyplot as plt
from dataclasses import dataclass
import numpy as np


In [313]:
# GLOBAL PARAMETER # 
NUM_OF_MATCHES = 1000
WINNING_REWARD_IF_NOT_STARTING = 3
WINNING_REWARD_IF_STARTING = 1
STALL_IF_STARTING = -1
STALL_IF_NOT_STARTING = 1
LOSING_PENALTY = -3

In [314]:
# AGENT DEFINITION # 
@dataclass
class Individual:
    set_of_moves: dict[tuple, int]
    game_decision: []
    fitness: float

In [315]:
# MATCH DEFINITION # 
class TicTacToe:
    def __init__(self) -> None:
        self._game = [[ " " for _ in range(3)] for _ in range(3)]
        self._status = {"row":[0 for _ in range(3)], "column":[0 for _ in range(3)], "diag":[0 for _ in range(2)]}

    def __bool__(self) -> bool:
        if check_winning(self._status):
            return True
        
        for line in self._game:
            for c in line:
                if c == " ":
                    return False
        return True
    
    @property
    def stampa(self):        # come stampare tic tac toe a mo' di griglia? 
        for riga in self._game:
            print("|".join(riga))
            print("-" * 5)
    
    def act(self, ply) -> None:
        row, col, move = ply
        self._status["row"][row] += move
        self._status["column"][col] += move
        if row==col:
            self._status["diag"][0] += move
        if row+col==2:
            self._status["diag"][1] += move

    def design(self, ply) -> None:
        row, col, move = ply
        if move>0:
            self._game[row][col] = "X"
        if move<0:
            self._game[row][col] = "O"
    

## Shortcut / Situation interpretation

In [316]:
def check_winning(status) -> bool:
    if any(abs(valore) == 3 for lista in status.values() for valore in lista):
        return True

In [317]:
def defense_analysis(match: TicTacToe) -> tuple:
    if any(valore == -2 for lista in match._status.values() for valore in lista):
        for key, lista in match._status.items():
            try:
                index = lista.index(-2)
                return  (key, index)
            except ValueError:
                pass
    return None

In [318]:
def offense_analysis(match: TicTacToe) -> tuple:
    if any(valore == 2 for lista in match._status.values() for valore in lista):
        for key, lista in match._status.items():
            try:
                index = lista.index(2)
                return (key, index)
            except ValueError:
                pass
    return None

## Player Strategy

### Opponent strategy

In [319]:
def pure_random(state: TicTacToe, player):
    """A completely random available move"""
    row = random.choice(range(3))
    col = random.choice(range(3))
    while state._game[row][col] != " ":
        row = random.choice(range(3))
        col = random.choice(range(3))
    return (row, col, player)

### Agent Strategy - Basic 1

In [320]:
def improved_agent_move(state: TicTacToe, player: Individual):
    """Do I have a match-point? Can I close the game with 1 right move?"""
    offensive_move = offense_analysis(state)
    if offensive_move is not None:
        if offensive_move[0] == "row":
            for col in range(3):
                if state._game[offensive_move[1]][col] == " ":
                    break
            return (offensive_move[1], col, 1)
        elif offensive_move[0] == "column":
            for row in range(3):
                if state._game[row][offensive_move[1]] == " ":
                    break
            return (row, offensive_move[1], 1)
        elif offensive_move[0] == "diag":
            if offensive_move[1] == 0:
                for rc in range(3):
                    if state._game[rc][rc] == " ":
                        break
                return (rc, rc, 1)
            else:
                for rc in range(3):
                    if state._game[rc][2-rc] == " ":
                        break     
                return (rc, 2-rc, 1)


    defensive_move = defense_analysis(state)
    """Do my opponent have a match-point? Can I defend and extend the game with 1 right move?"""
    if defensive_move is not None:
        if defensive_move[0] == "row":
            for col in range(3):
                if state._game[defensive_move[1]][col] == " ":
                    break
            return (defensive_move[1], col, 1)
        elif defensive_move[0] == "column":
            for row in range(3):
                if state._game[row][defensive_move[1]] == " ":
                    break
            return (row, defensive_move[1], 1)
        elif defensive_move[0] == "diag":
            if defensive_move[1] == 0:
                for rc in range(3):
                    if state._game[rc][rc] == " ":
                        break
                return (rc, rc, 1)
            else:
                for rc in range(3):
                    if state._game[rc][2-rc] == " ":
                        break     
                return (rc, 2-rc, 1)
            
    """Choose from a list of move"""
    list_of_moves = sorted(player.set_of_moves.items(), key=lambda m:m[1])
    (row, col) = list_of_moves.pop()[0]
    while state._game[row][col] != " ":
        (row, col) = list_of_moves.pop()[0]
    player.game_decision.append((row, col))
    return (row, col, 1)

In [321]:
def learning_strategy(starter, winner, player: Individual):
    for i, move in enumerate(player.game_decision):

        if winner>0:
            ## WINNING_REWARD
            if starter>0:
                ##_IF_STARTING
                player.set_of_moves[move] += len(player.game_decision)-i    # max([0,len(player.game_decision)-0.5*i])
            else:
                ##_IF_NOT_STARTING
                player.set_of_moves[move] += 1.5*(len(player.game_decision)-i)
        elif winner == 0:
            ## STALL
            if starter>0:
                ##_IF_STARTING
                player.set_of_moves[move] -= i*len(player.game_decision)
            else:
                ##_IF_NOT_STARTING
                player.set_of_moves[move] += i*len(player.game_decision)
        else:
            ##LOSING_PENALTY
            player.set_of_moves[move] += LOSING_PENALTY*(len(player.game_decision)-i)
    player.game_decision.clear()



## TicTacToe Match

In [322]:
agent = Individual(
    set_of_moves = {(i, j): 0 for i in range(3) for j in range(3)} ,
    game_decision = [],
    fitness = 0.0
)

In [323]:
def Game(players) -> tuple:
    match = TicTacToe()
    starter = random.choice([-1, -1, -1, 1])        # Inizio io il 25% delle volte
    player = starter
    while not match:

        player = -player
        if player > 0:
            ply = improved_agent_move(match, players[1])
        if player < 0:
            ply = players[0](match, player)
        match.design(ply)
        match.act(ply)

    if not check_winning(match._status):
        player = 0

    return starter, player

## TRAINING - Agent1 vs Random - 1000 match

In [None]:
logging.getLogger().setLevel(logging.INFO)

strategy = (pure_random, agent)
history = []

for n in range(NUM_OF_MATCHES):
    starter, winner = Game(strategy)
    if winner==0:
        pass
        #logging.info(f"status: No winner !")
    else:
        pass
        #logging.info(f"status: Player {"move X" if winner>0 else "move O"} won!")
    learning_strategy(starter, winner, strategy[1])
    history.append(winner)
print(agent.set_of_moves)

"""history = np.array(history)
generations = range(1, NUM_OF_MATCHES + 1)

plt.plot(generations, history, marker='o', linestyle='-', color='r')
plt.title('Results Over Rounds')
plt.xlabel('Rounds')
plt.ylabel('Win %')
plt.grid(True)
plt.show()"""

## Evaluation Games

In [None]:
# SINGLE FINAL GAME #
strategy = (pure_random, agent)
winner = Game(strategy)
match = TicTacToe()
player = -1

while not match:
    player = -player
    if player > 0:
        ply = improved_agent_move(match, agent)
    if player < 0:
        ply = strategy[0](match, player)
    match.design(ply)
    match.act(ply)
    match.stampa

In [None]:
logging.getLogger().setLevel(logging.INFO)

strategy = (pure_random, agent)
history = []
for round in range(1000):
    win=0
    for n in range(100):
        starter, winner = Game(strategy)
        if winner==0:
            pass
            #logging.info(f"status: No winner !")
        else:
            if winner==1:
                win += 1
            #logging.info(f"status: {"I'm winning!" if winner>0 else "I'm still losing!"}")

    history.append(win)
    print(f"{win} %")

history = np.array(history)
avg_at_every_round = np.cumsum(history) / (np.arange(len(history)) + 1)
generations = range(1, 1000 + 1)

plt.plot(generations, avg_at_every_round, linestyle='-', color='r')
plt.title('Results Over Rounds')
plt.xlabel('Rounds')
plt.ylabel('Win %')
plt.grid(True)
plt.show()