# LAB 10
Use reinforcement learning to devise a tic-tac-toe player.

## Deadlines

- Submission: [Dies Natalis Solis Invicti](https://en.wikipedia.org/wiki/Sol_Invictus)
- Reviews: [Befana](https://en.wikipedia.org/wiki/Befana)

### Notes
- Reviews will be assigned on Monday, December 4
- You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [80]:
import numpy as np
from tqdm import tqdm, trange
from dataclasses import dataclass, field
from typing import Literal, Union
from itertools import combinations

In [81]:
DirectIndex = Literal[0,1,2,3,4,5,6,7,8]
RowColIndex = tuple[Literal[0,1,2], Literal[0,1,2]]
Cell = Literal[-1, 0, 1]
Player = Literal[0,1]

CELL_TO_EMOJI=("⬜","❎","⏺️")

@dataclass(repr=False)
class Board:
    board: np.ndarray = field(default_factory=lambda: np.ones(9, dtype=np.int8) * -1)

    @staticmethod
    def i_to_rc(i: DirectIndex) -> RowColIndex:
        return i//3, i % 3
    
    @staticmethod
    def rc_to_i(rc: RowColIndex) -> DirectIndex:
        r, c = rc
        return r*3 + c

    @staticmethod
    def is_valid_index(idx: Union[DirectIndex, RowColIndex]) -> bool:
        if isinstance(idx, tuple):
            return idx[0] >= 0 and idx[0] <= 2 and idx[1]>=0 and idx[1]<= 2
        else:
            return idx >= 0 and idx <= 8
        
    def __getitem__(self, idx: Union[DirectIndex, RowColIndex]) -> Cell:
        """Access the cell directly with index or row-col"""
        assert Board.is_valid_index(idx), "Invalid Index: {idx}"
        if isinstance(idx, tuple):
            idx = Board.rc_to_i(idx)
        return self.board[idx]

    def __setitem__(self, idx: Union[DirectIndex, RowColIndex], value: Cell) -> None:
        assert Board.is_valid_index(idx), "Invalid Index: {idx}"
        if isinstance(idx, tuple):
            idx = Board.rc_to_i(idx)
        self.board[idx] = value

    def is_valid_move(self: "Board",move: Union[DirectIndex, RowColIndex]) -> bool:
        return self[move] == -1
    
    def move(self: "Board", player: "Player", move: Union[DirectIndex, RowColIndex]) -> bool:
        valid = self[move] == -1 
        if valid:
            self[move] = player
        return valid
    
    def is_playable(self: "Board") -> bool:
        return any(self.board == -1)
    
    def won(self: "Board") -> Literal[0, 1, -1]:
        """Check if someone has won"""

        rows = [[0,1,2], [3,4,5], [6,7,8]]
        cols = [[0,3,6],[1,4,7], [2,5,8]]
        diag = [[0,4,8], [2,4,6]]
        all_ = [*rows, *cols, *diag]

        if any(all(self.board[c] == 0) for c in all_):
            return 0
        elif any(all(self.board[c] == 1) for c in all_):
            return 1
        else: 
            return -1
    
    def __repr__(self: "Board") -> str:
        winner = self.won()
        return f"Board({str(self.board)}, {winner=}) "

    def __str__(self) -> str:
        """Pretty print the board"""
        s = ""
        for r in range(3):
            for c in range(3):
                s += CELL_TO_EMOJI[self[(r,c)] + 1]
            s+="\n"
        return s
