Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB10

Use reinforcement learning to devise a tic-tac-toe player.

### Deadlines:

* Submission: [Dies Natalis Solis Invicti](https://en.wikipedia.org/wiki/Sol_Invictus)
* Reviews: [Befana](https://en.wikipedia.org/wiki/Befana)

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [1]:
from itertools import combinations
from collections import namedtuple, defaultdict
import random as random
from copy import deepcopy

from tqdm.auto import tqdm
import numpy as np
import json

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
class Tic_Tac_Toe:
    def __init__(self, size=3, player_turns=1) -> None:
        self._board = np.ones((size, size), dtype=np.uint8) * -1
        self._size = size

        self.player_turns = player_turns or random.randint(0,1)
   
    def print_board(self):
        """Nicely prints the board"""
        for r in range(3):
            for c in range(3):
                if self._board[r][c] == 0:
                    print('X', end='')
                elif self._board[r][c] == 1:
                    print('O', end='')
                else:
                    print('_', end='')
            print()
        print()

    def get_board(self):
        return self._board
    
    def get_string_board(self) -> str:
        return json.dumps(self._board.tolist())
        #return self._board.tobytes()

    def make_move(self, player, action):
        if(player == 0) or (player == 1):
            self._board[action[0]][action[1]] = player
        else:
            raise ValueError("Giocatore non valido")
    
    def is_available(self, move):
        if(move[0]>-1 and move[0]<self._size) and (move[1]>-1 and move[1]<self._size):
            if(self._board[move[0]][move[1]] == -1):
                return True
        return False

    def check_winner(self) -> int:
        '''Check the winner. Returns the player ID of the winner if any, otherwise returns -1'''
        # for each row
        for x in range(self._board.shape[0]):
            # if a player has completed an entire row
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]):
                # return the relative id
                return self._board[x, 0]

        # for each column
        for y in range(self._board.shape[1]):
            # if a player has completed an entire column
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]):
                # return the relative id
                return self._board[0, y]
            
        # if a player has completed the principal diagonal
        if self._board[0, 0] != -1 and all(
            [self._board[x, x]
                for x in range(self._board.shape[0])] == self._board[0, 0]
        ):
            # return the relative id
            return self._board[0, 0]
        # if a player has completed the secondary diagonal
        if self._board[0, -1] != -1 and all(
            [self._board[x, -(x + 1)]
             for x in range(self._board.shape[0])] == self._board[0, -1]
        ):
            # return the relative id
            return self._board[0, -1]
        
        return -1
    
    def is_end(self):
        # Is the whole board full?
        for r in range(0, self._size):
            for c in range(0, self._size):
                # There's an empty field, we continue the game
                if (self._board[r][c] == '-1'):
                    return False
        return True


In [None]:
# Player 'O' is max, in this case AI
def max(self, game:Tic_Tac_Toe, my_player):

    # Possible values for maxv are:
    # -1 - loss
    # 0  - a tie
    # 1  - win

    # We're initially setting it to -2 as worse than the worst case:
    maxv = -2
    
    move = [None, None]
    
    result = game.check_winner()
    if(result == my_player):
        return (1, 0, 0) #win case
    elif(result == 1-my_player):
        return (-1, 0, 0) #lose case
    elif(result == -1) or (game.is_end):
        return (0, 0, 0) #draw case

    #if the game is not finish yet
    for i in range(0, 3):
        for j in range(0, 3):
            if self.current_state[i][j] == '-1':
                # On the empty field player 'O' makes a move and calls Min
                # That's one branch of the game tree.
                self.current_state[i][j] = 'O'
                (m, min_i, min_j) = self.min()
                # Fixing the maxv value if needed
                if m > maxv:
                    maxv = m
                    move[0] = i
                    move[1] = j
                # Setting back the field to empty
                self.current_state[i][j] = '.'
    return (maxv, move[0], move[1])

In [None]:
# Player 'O' is max, in this case AI
def max(self):

    # Possible values for maxv are:
    # -1 - loss
    # 0  - a tie
    # 1  - win

    # We're initially setting it to -2 as worse than the worst case:
    maxv = -2

    px = None
    py = None

    result = self.is_end()

    # If the game came to an end, the function needs to return
    # the evaluation function of the end. That can be:
    # -1 - loss
    # 0  - a tie
    # 1  - win
    if result == 'X':
        return (-1, 0, 0)
    elif result == 'O':
        return (1, 0, 0)
    elif result == '.':
        return (0, 0, 0)

    for i in range(0, 3):
        for j in range(0, 3):
            if self.current_state[i][j] == '.':
                # On the empty field player 'O' makes a move and calls Min
                # That's one branch of the game tree.
                self.current_state[i][j] = 'O'
                (m, min_i, min_j) = self.min()
                # Fixing the maxv value if needed
                if m > maxv:
                    maxv = m
                    px = i
                    py = j
                # Setting back the field to empty
                self.current_state[i][j] = '.'
    return (maxv, px, py)