## DQN

In [None]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

In [None]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self): return self._board

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

In [1]:
border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16
44


In [None]:
dict_rot = {
    (Move.TOP, 1): Move.LEFT,
    (Move.TOP, 2): Move.BOTTOM,
    (Move.TOP, 3): Move.RIGHT,
    (Move.BOTTOM, 1): Move.RIGHT,
    (Move.BOTTOM, 2): Move.TOP,
    (Move.BOTTOM, 3): Move.LEFT,
    (Move.LEFT, 1): Move.TOP,
    (Move.LEFT, 2): Move.RIGHT,
    (Move.LEFT, 3): Move.BOTTOM,
    (Move.RIGHT, 1): Move.BOTTOM,
    (Move.RIGHT, 2): Move.LEFT,
    (Move.RIGHT, 3): Move.TOP,
}

dict_flip = {
    Move.TOP: Move.TOP,
    Move.BOTTOM: Move.BOTTOM,
    Move.LEFT: Move.RIGHT,
    Move.RIGHT: Move.LEFT,
}

#rot_orario: (3, 4) -> (4, 1) -> (1, 0) -> (0, 3) -> (3, 4)
#: (xi, yi) -> (yi, 4 - xi)
#rot_anti_orario: (3, 4) -> (0, 3) -> (1, 0) -> (4, 1) -> (3, 4)
#: (xi, yi) -> (4 - yi, xi)

def rot(n_rot):
    def rot_n(from_pos, move):
        for _ in range(n_rot):
            from_pos = 4 - from_pos[1], from_pos[0]
        return from_pos, dict_rot[(move, n_rot)]
    return rot_n

def flip(from_pos, move):
    from_pos = 4 - from_pos[0], from_pos[1]
    return from_pos, dict_flip[move]

def flip_rot(n_rot):
    def flip_rot_n(from_pos, move):
        from_pos, move = rot(n_rot)(from_pos, move)
        return flip(from_pos, move)
    return flip_rot_n

rot1 = rot(1)
rot2 = rot(2)
rot3 = rot(3)
flip_rot1 = flip_rot(1)
flip_rot2 = flip_rot(2)
flip_rot3 = flip_rot(3)

In [None]:
inverse_simmetries = [
    rot1,
    rot2,
    rot3,
    flip,
    flip_rot1,
    flip_rot2,
    flip_rot3,
]

In [None]:
def find_index(tup, list_of_tuple):
    for i in range(len(list_of_tuple)):
        if tup == list_of_tuple[i]: return i
    return None

In [None]:
def check_simmetries(board, state_list):

    if tuple(board.flatten()) in state_list: return tuple(board.flatten()), None

    R1 = np.rot90(board)
    if find_index(tuple(R1.flatten()), state_list) is not None: return tuple(R1.flatten()), 0

    R2 = np.rot90(R1)
    if find_index(tuple(R2.flatten()), state_list) is not None: return tuple(R2.flatten()), 1

    R3 = np.rot90(R2)
    if find_index(tuple(R3.flatten()), state_list) is not None: return tuple(R3.flatten()), 2
    
    F = np.fliplr(board)
    if find_index(tuple(F.flatten()), state_list) is not None: return tuple(F.flatten()), 3
    
    FR1 = np.rot90(F)
    if find_index(tuple(FR1.flatten()), state_list) is not None: return tuple(FR1.flatten()), 4
    
    FR2 = np.rot90(FR1)
    if find_index(tuple(FR2.flatten()), state_list) is not None: return tuple(FR2.flatten()), 5
    
    FR3 = np.rot90(FR2)
    if find_index(tuple(FR3.flatten()), state_list) is not None: return tuple(FR3.flatten()), 6
    
    return None

In [None]:
MOVES_SIMMETRIES = {} #(id_move, id_simmetry) -> id_move

for id_move in range(len(ALL_MOVES)):
    from_pos, move = ALL_MOVES[id_move]

    for id_simmetry in range(len(inverse_simmetries)):

        idx = None
        for i in range(len(ALL_MOVES)):
            if ALL_MOVES[i] == inverse_simmetries[id_simmetry](from_pos, move):
                idx = i
                break
        
        MOVES_SIMMETRIES[(id_move, id_simmetry)] = i

print(len(MOVES_SIMMETRIES))
print(len(ALL_MOVES) * 7)

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

class DenseQ(nn.Module):
    def __init__(self) -> None:
        super(DenseQ, self).__init__()

        self.input_shape = (3, 5, 5)
        self.dense_input_shape = 75
        self.action_size = N_ALL

        self.flatten = nn.Flatten(0)
        self.dense1 = nn.Linear(self.dense_input_shape, self.action_size * 4)
        self.dense2 = nn.Linear(self.action_size * 4, self.action_size * 2)
        self.dense3 = nn.Linear(self.action_size * 2, self.action_size)
        

    def forward(self, x):

        x = self.flatten(x)
        x = torch.relu(self.dense1(x))
        x = torch.relu(self.dense2(x))
        x = self.dense3(x)

        return x

    def expand_board(self, x):

        if self.player_id == 1:
            new_x = np.ones(shape= x.shape) * -1
            new_x[x == 0] = 1
            new_x[x == 1] = 0
            x = new_x

        new_x = np.zeros(shape= (3, 5, 5))
        new_x[0, x == -1] = 1
        new_x[1, x == 0] = 1
        new_x[2, x == 1] = 1

        return torch.Tensor(new_x)
    
    def use(self, board, player_id):
        self.player_id = player_id
        x = self.expand_board(board)
        return self.forward(x)

class DQNPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

        self.dense = DenseQ()
        self.rand = RandomPlayer()

        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.dense.parameters(), lr= 0.01)

    def getDense(self): return self.dense
    
    def train(self, game: 'Game'):
        board = game.get_board()
        player_id = game.current_player_idx
        id_move = self.dense.use(board, player_id)
        while board[ALL_MOVES[id_move][0]] == 1 - player_id:
            print('going random')
            id_move = np.random.randint(0, N_ALL)
        return id_move
    
    def train_2(self, board, player_id):
        id_move = self.dense.use(board, player_id)
        while board[ALL_MOVES[id_move][0]] == 1 - player_id:
            print('going random')
            id_move = np.random.randint(0, N_ALL)
        return id_move

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        board = game.get_board()
        player_id = game.current_player_idx
        from_pos, move = ALL_MOVES[torch.argmax(self.dense.use(board, player_id).detach())]
        while board[from_pos] == 1 - player_id:
            print('going random')
            from_pos, move = self.rand.make_move(game)
        return from_pos, move

In [None]:
agent = DQNPlayer()

n_trials = 10
rand_p = RandomPlayer()

for i_g in range(n_trials):

    game = Dummy_Game()
    
    while game.check_winner() != -1:

        board = game.get_board()
        player_id = game.current_player_idx

        q_values = agent.train_2(board, player_id)

        from_pos, move = torch.argmax(q_values).item()

        new_board, ok = game.do_move(from_pos, move, player_id)
        if not ok: print('ERROR\nERROR\nERROR')

        reward = ...

        target = reward + 0.95 * max(agent.train_2(new_board, ))

        

In [None]:
ThePlayer = agent

wins_first = 0
wins_second = 0
n_trials = 10

for i_g in range(n_trials):

    print('==============================================================================')
    print('==============================================================================')
    print(f'game {i_g+1} - Player First (0)')

    game = Game()
    winner = game.play(ThePlayer, RandomPlayer())
    if winner == 0:
        print(game.get_board())
        print('Player win')
        wins_first += 1
    else:
        print(game.get_board())
        print('Player lose')

    print(f'game {i_g+1} - Player Second (1)')

    game = Game()
    winner = game.play(RandomPlayer(), ThePlayer)
    if winner == 1:
        print(game.get_board())
        print('Player win')
        wins_second += 1
    else:
        print(game.get_board())
        print('Player lose')

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")