## minimax with alpha-beta pruning and state represented as integers for transposition table

In [7]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)
print(N_ALL)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16
44


In [8]:
## state can be represented as a number if 0,1 are considered as bit

In [9]:
import numpy as np

def state_to_board(state):
    binary_string = format(state, '050b')
    binary_array = np.array(list(map(int, binary_string))).reshape(2, 5, 5)

    board = np.zeros((5, 5), dtype=int)
    board[binary_array[0] == 1] = -1
    board[binary_array[1] == 1] = 1

    return board

def board_to_state(board):
    binary_array = np.zeros((2, 5, 5), dtype=int)
    
    binary_array[0][board == -1] = 1
    binary_array[1][board == 1] = 1

    binary_string = ''.join(map(str, binary_array.flatten()))
    return int(binary_string, 2)



rand_board = np.random.choice([-1, 0, 1], size=(5, 5), replace=True)
print('Board:')
print(rand_board)

rand_state = board_to_state(rand_board)
rand_board = state_to_board(rand_state)

print('\nState:')
print(rand_state)
print('\nBoard:')
print(state_to_board(rand_state))

Board:
[[ 0  0 -1 -1  1]
 [ 0  0  1  0 -1]
 [ 1  0  0  0 -1]
 [ 0  0  0  0  0]
 [-1 -1  1 -1  0]]

State:
212240977510404

Board:
[[ 0  0 -1 -1  1]
 [ 0  0  1  0 -1]
 [ 1  0  0  0 -1]
 [ 0  0  0  0  0]
 [-1 -1  1 -1  0]]


In [10]:
import time

class TimeCounter:
    def __init__(self):
        self.tot_time = 0
        self.count = 0
    
    def add_t(self, t):
        self.tot_time += t
        self.count += 1

    def get(self): return self.tot_time, self.tot_time / self.count

In [11]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 0

    def get_board(self): return deepcopy(self._board)

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def do_move(self, from_pos, move, player_id):
        return self.__move(from_pos, move, player_id)

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
            if acceptable: self.current_player_idx = 1 - self.current_player_idx
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable


In [12]:
def minimax(game: "Dummy_Game", board, depth, maximizing, current_player, alpha, beta, transposition_table): #, time_counter):

    #state = tuple(board.flatten())
    state = board_to_state(board)
    
    #time_start = time.perf_counter()
    if state in transposition_table: return transposition_table[state]
    #time_counter.add_t((time.perf_counter() - time_start) * 1e6)

    winner = game.check_winner()
    if winner != -1 or depth == 0:
        return evaluate_board(board, winner, current_player if maximizing else 1 - current_player)

    if maximizing:
        max_eval = float('-inf')
        for om in ALL_MOVES:
            from_pos, move = om
            new_board, ok = game.single_move(board, from_pos, move, current_player)
            if ok:
                eval = minimax(game, new_board, depth - 1, False, 1 - current_player, alpha, beta, transposition_table) #, time_counter)

                max_eval = max(max_eval, eval)
                alpha = max(alpha, eval)

                if beta <= alpha: break  # Prune the remaining branches

        transposition_table[state] = max_eval
        return max_eval
    else:
        min_eval = float('inf')
        for om in ALL_MOVES:
            from_pos, move = om
            new_board, ok = game.single_move(board, from_pos, move, current_player)
            if ok:
                eval = minimax(game, new_board, depth - 1, True, 1 - current_player, alpha, beta, transposition_table) #, time_counter)

                min_eval = min(min_eval, eval)
                beta = min(beta, eval)

                if beta <= alpha: break  # Prune the remaining branches

        transposition_table[state] = min_eval
        return min_eval
    
def evaluate_board(board, winner, current_player):

    bonus = 0
    diag_a_player = 0
    diag_a_enemy = 0
    diag_b_player = 0
    diag_b_enemy = 0
    for i in range(5):
        line = board[i, :]
        bonus += pow(sum(line == current_player), 2) - pow(sum(line == 1 - current_player), 2)
        line = board[:, i]
        bonus += pow(sum(line == current_player), 2) - pow(sum(line == 1 - current_player), 2)
        if board[i, i] == current_player: diag_a_player += 1
        elif board[i, i] == 1 - current_player: diag_a_enemy += 1
        if board[i, 4-i] == current_player: diag_b_player += 1
        elif board[i, 4-i] == 1 - current_player: diag_b_enemy += 1

    bonus += pow(diag_a_player, 2) - pow(diag_a_enemy, 2)
    bonus += pow(diag_b_player, 2) - pow(diag_b_enemy, 2)

    base_reward = -100 # winner == 1 - current_player
    if winner == current_player: base_reward = 100
    elif winner == -1: base_reward = 0

    return base_reward + bonus

def minimax_wrapper(board, player_id, max_depth= 2, transposition_table= {}, return_val= False):

    game = Dummy_Game()

    best_move = None
    best_eval = float('-inf')

    values = np.zeros(shape= (len(ALL_MOVES),))

    #time_counter = TimeCounter()
    
    for i_m, m in enumerate(ALL_MOVES):
        from_pos, move = m
        new_board, ok = game.single_move(board, from_pos, move, player_id)
        if ok:
            om_eval = minimax(game, new_board, max_depth, False, 1 - player_id, float('-inf'), float('inf'), transposition_table) #, time_counter)

            if om_eval > best_eval:
                best_eval = om_eval
                best_move = m

            values[i_m] = om_eval

    #print(f"Best move: {best_move} -> Best Value: {best_eval}")
    
    #tt, avgt = time_counter.get()
    #print(f"Elapsed time: {(tt, avgt)} microseconds")
    #print(f"Elapsed time: {tt / 1e6, avgt / 1e6} seconds")

    if return_val: return values

    return best_move

initial_board = np.ones((5, 5)) * -1

minimax_wrapper(initial_board, 1)

((2, 0), <Move.BOTTOM: 1>)

In [14]:
randP = RandomPlayer()

n_trials = 1000

moves_seen = {}

for i_g in tqdm(range(n_trials)):

    game = Dummy_Game()

    while game.check_winner() == -1:

        ## Dense turn

        board = game.get_board()
        player_id = game.current_player_idx

        mini_values = minimax_wrapper(board, player_id, 1, {}, True)

        from_pos, move = ALL_MOVES[np.argmax(mini_values)]

        state = list(board.flatten())
        state.append(player_id)
        state = tuple(state)

        if state not in moves_seen.keys(): moves_seen[state] = mini_values

        ## Random turn

        ok = False
        while not ok:
            from_pos, move = randP.make_move(game)
            ok = game.do_move(from_pos, move, game.current_player_idx)

print(len(moves_seen))

100%|██████████| 1000/1000 [1:28:45<00:00,  5.33s/it]

43879





In [18]:
import torch
import torch.nn as nn
import torch.optim as optim

class DenseMini(nn.Module):
    def __init__(self) -> None:
        super(DenseMini, self).__init__()

        self.input_shape = (3, 5, 5)
        self.dense_input_shape = 75
        self.action_size = N_ALL

        # first

        self.flatten = nn.Flatten(0)
        self.dense1 = nn.Linear(self.dense_input_shape, self.action_size * 4)
        self.dense2 = nn.Linear(self.action_size * 4, self.action_size * 2)
        self.dense3 = nn.Linear(self.action_size * 2, self.action_size)

        # second

#        self.flatten = nn.Flatten(0)
#        self.dense1 = nn.Linear(self.dense_input_shape, self.action_size * 40)
#        self.dense2 = nn.Linear(self.action_size * 40, self.action_size * 20)
#        self.dense3 = nn.Linear(self.action_size * 20, self.action_size)

        # third

#        self.flatten = nn.Flatten(0)
#
#        self.dense1 = nn.Linear(self.dense_input_shape, self.dense_input_shape * 2)
#        self.dense2 = nn.Linear(self.dense_input_shape * 2, self.action_size) # output (44,)
#
#        self.conv1 = nn.Conv2d(3, self.action_size, 3, 1)
#        self.conv11 = nn.Conv2d(self.action_size + 3, self.action_size, 2, 1)
#        self.conv111 = nn.Conv2d(self.action_size * 2 + 3, self.action_size, 2, 1) # should output (44,)
#
#        self.conv2 = nn.Conv2d(3, self.action_size, 2, 1)
#        self.conv22 = nn.Conv2d(self.action_size + 3, self.action_size, 2, 1)
#        self.conv222 = nn.Conv2d(self.action_size * 2 + 3, self.action_size, 2, 1)
#        self.conv2222 = nn.Conv2d(self.action_size * 3 + 3, self.action_size, 2, 1)  # should output (44,)
#
#        self.out_dense1 = nn.Linear(self.action_size * 3, self.action_size * 2)
#        self.out_dense2 = nn.Linear(self.action_size * 2, self.action_size)
        

    def forward(self, x):

        # first and second

        x = self.flatten(x)
        x = torch.relu(self.dense1(x))
        x = torch.relu(self.dense2(x))
        x = self.dense3(x)

        # third
#
#        print('---------------------------------')
#        print(f'x: {x.shape}')
#        print('-')
#
#        x_flatten = self.flatten(x)
#
#        print(f'x_flatten: {x_flatten.shape}')
#        print('-')
#
#        x0 = torch.relu(self.dense1(x_flatten))
#        print(f'x0: {x0.shape}')
#        x0 = torch.relu(self.dense2(x0))
#
#        print(f'x0: {x0.shape}')
#        print('-')
#
#        x1 = torch.relu(self.conv1(x))
#        print(f'x1: {x1.shape}')
#        x1 = torch.cat([x1, x], dim= 0)
#        print(f'x1: {x1.shape}')
#        x11 = torch.relu(self.conv11(x1))
#        print(f'x11: {x11.shape}')
#        x11 = torch.cat([x11, x1, x], dim= 0)
#        print(f'x11: {x11.shape}')
#        x111 = torch.relu(self.conv111(x11))
#        print(f'x111: {x111.shape}')
#        x111 = self.flatten(x111)
#
#        print(f'x111: {x111.shape}')
#        print('-')
#
#        x2 = torch.relu(self.conv2(x))
#        print(f'x2: {x2.shape}')
#        x2 = torch.cat([x2, x], dim= 0)
#        print(f'x2: {x2.shape}')
#        x22 = torch.relu(self.conv22(x2))
#        print(f'x22: {x22.shape}')
#        x22 = torch.cat([x22, x2, x], dim= 0)
#        print(f'x22: {x22.shape}')
#        x222 = torch.relu(self.conv222(x22))
#        print(f'x222: {x222.shape}')
#        x222 = torch.cat([x222, x22, x2, x], dim= 0)
#        print(f'x222: {x222.shape}')
#        x2222 = torch.relu(self.conv2222(x222))
#        print(f'x2222: {x2222.shape}')
#        x2222 = self.flatten(x2222)
#
#        print(f'x2222: {x2222.shape}')
#        print('-')
#
#        xio = torch.cat([x_flatten, x0, x111, x2222], dim= 0)
#        print(f'xio: {xio.shape}')
#        xio = torch.relu(self.out_dense1(xio))
#        print(f'xio: {x.shape}')
#        xio = self.out_dense2(xio)
#
#        print(f'xio: {xio.shape}')
#        print('==============')
#        
#        x = xio

        return x

    def expand_board(self, x):

        if self.player_id == 1:
            new_x = np.ones(shape= x.shape) * -1
            new_x[x == 0] = 1
            new_x[x == 1] = 0
            x = new_x

        new_x = np.zeros(shape= (3, 5, 5))
        new_x[0, x == -1] = 1
        new_x[1, x == 0] = 1
        new_x[2, x == 1] = 1

        return torch.Tensor(new_x)
    
    def use(self, board, player_id):
        self.player_id = player_id
        x = self.expand_board(board)
        return self.forward(x)

class DensePlayer(Player):
    def __init__(self) -> None:
        super().__init__()

        self.dense = DenseMini()
        self.rand = RandomPlayer()

        self.last_move = None, None, None
        self.until_draw = 10

    def getDense(self): return self.dense
    
    def train(self, game: 'Game'):
        return self.dense.use(game.get_board(), game.current_player_idx)
    
    def train_2(self, board, player_id):
        return self.dense.use(board, player_id)

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        board = game.get_board()
        player_id = game.current_player_idx
        from_pos, move = ALL_MOVES[torch.argmax(self.dense.use(board, player_id).detach())]
        
        while board[from_pos] == 1 - player_id: from_pos, move = self.rand.make_move(game)

        state = tuple(board.flatten())
        if state == self.last_move[0] and from_pos == self.last_move[1] and move == self.last_move[2]:
            self.until_draw -= 1
            if self.until_draw == 0:
                self.until_draw = 10
                from_pos, move = RandomPlayer().make_move(game)
            
        self.last_move = (state, from_pos, move)

        return from_pos, move

In [16]:
agent = DensePlayer()

N_EPOCHS = 100
BATCH_SIZE = 100
LEARNING_RATE = 0.01

criterion = nn.MSELoss()
optimizer = optim.Adam(agent.getDense().parameters(), lr= LEARNING_RATE)

ms_list = list(moves_seen.items())

for epoch in range(N_EPOCHS):

    print(f'epoch: {epoch}')

    avg_loss = 0
    n_loss = 0
    cumulative_loss = None

    idx_rand = np.argsort(np.random.random(size= (len(ms_list,))))
    ms_list = [ms_list[i] for i in idx_rand]

    for i_m in range(len(ms_list)):

        state, mini_values = ms_list[i_m]
        board = np.array(state[:-1], dtype=np.uint8).reshape(5, 5)
        pid = state[-1]

        #board, pid, mini_values = moves_seen[i_m]

        agent_values = agent.train_2(board, pid)

        if cumulative_loss is None: cumulative_loss = criterion(torch.Tensor(mini_values), agent_values)
        else: cumulative_loss += criterion(torch.Tensor(mini_values), agent_values)

        if i_m % BATCH_SIZE == BATCH_SIZE - 1:
            optimizer.zero_grad()
            cumulative_loss.backward()
            optimizer.step()
            avg_loss += cumulative_loss.item()
            cumulative_loss = None
            n_loss += 1

    print(f'avg_loss: {avg_loss / n_loss}')
    print('-----------------------------------------------')
    

epoch: 0
avg_loss: 119225.15674942922
-----------------------------------------------
epoch: 1
avg_loss: 74713.83855058505
-----------------------------------------------
epoch: 2
avg_loss: 65197.14081228596
-----------------------------------------------
epoch: 3
avg_loss: 61576.07533354737
-----------------------------------------------
epoch: 4
avg_loss: 59238.26076448345
-----------------------------------------------
epoch: 5
avg_loss: 57503.22281678082
-----------------------------------------------
epoch: 6
avg_loss: 55954.52316103025
-----------------------------------------------
epoch: 7
avg_loss: 54397.23071846461
-----------------------------------------------
epoch: 8
avg_loss: 53057.526335973176
-----------------------------------------------
epoch: 9
avg_loss: 51443.4044663242
-----------------------------------------------
epoch: 10
avg_loss: 49950.98180650685
-----------------------------------------------
epoch: 11
avg_loss: 48476.75018728596
-------------------------

In [17]:
ThePlayer = agent

wins_first = 0
wins_second = 0
n_trials = 10

for i_g in range(n_trials):

    print('==============================================================================')
    print('==============================================================================')
    print(f'game {i_g+1} - Player First (0)')

    game = Game()
    winner = game.play(ThePlayer, RandomPlayer())
    if winner == 0:
        print(game.get_board())
        print('Player win')
        wins_first += 1
    else:
        print(game.get_board())
        print('Player lose')

    print(f'game {i_g+1} - Player Second (1)')

    game = Game()
    winner = game.play(RandomPlayer(), ThePlayer)
    if winner == 1:
        print(game.get_board())
        print('Player win')
        wins_second += 1
    else:
        print(game.get_board())
        print('Player lose')

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

game 1 - Player First (0)
[[ 0  0 -1 -1  0]
 [ 0  1  0 -1 -1]
 [ 0 -1 -1 -1  1]
 [ 0 -1  1 -1 -1]
 [ 0  0  1  1  1]]
Player win
game 1 - Player Second (1)
[[ 0  1  1  0  0]
 [ 1  1  1  1  1]
 [ 0  0 -1  0  0]
 [ 0 -1 -1 -1 -1]
 [ 0  0  1  0  0]]
Player win
game 2 - Player First (0)
[[ 0 -1 -1  0  1]
 [ 0 -1 -1 -1 -1]
 [ 0 -1 -1 -1  1]
 [ 0 -1 -1 -1 -1]
 [ 0 -1  1  1  1]]
Player win
game 2 - Player Second (1)
[[-1 -1 -1  0  1]
 [-1 -1 -1 -1  1]
 [-1 -1  0 -1  1]
 [ 0  1  0 -1  1]
 [ 0  0 -1  0  1]]
Player win
game 3 - Player First (0)
[[ 0  1  0  1  1]
 [ 1  0 -1 -1  1]
 [ 1 -1  0 -1 -1]
 [ 0 -1 -1  0  1]
 [ 1 -1 -1 -1  0]]
Player win
game 3 - Player Second (1)
[[ 0  0  0  1  0]
 [ 0  1  0  0  1]
 [ 1  1  0 -1  1]
 [ 1  0  1  0  1]
 [ 0  1  1 -1 -1]]
Player lose
game 4 - Player First (0)
[[ 0  0  0  0  0]
 [ 1  0 -1 -1  1]
 [ 1 -1 -1 -1  1]
 [ 0  1 -1 -1 -1]
 [ 0  0  1  1 -1]]
Player win
game 4 - Player Second (1)
[[ 0 -1 -1  1  1]
 [ 1 -1 -1  1  1]
 [-1 -1  1  0  1]
 [ 1  0  1  0  1]
 