## just a policy

In [1]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)
print(N_ALL)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16
44


In [2]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable


In [3]:
def evaluate_board(board, winner, current_player):

    bonus = 0
    diag_a_player = 0
    diag_a_enemy = 0
    diag_b_player = 0
    diag_b_enemy = 0
    for i in range(5):
        
        line = board[i, :]
        bonus += pow(sum(line == current_player), 2) - pow(sum(line == 1 - current_player), 2)
        #bonus += sum(line == current_player) - sum(line == 1 - current_player)

        line = board[:, i]
        bonus += pow(sum(line == current_player), 2) - pow(sum(line == 1 - current_player), 2)
        #bonus += sum(line == current_player) - sum(line == 1 - current_player)
        
        if board[i, i] == current_player: diag_a_player += 1
        elif board[i, i] == 1 - current_player: diag_a_enemy += 1
        
        if board[i, 4-i] == current_player: diag_b_player += 1
        elif board[i, 4-i] == 1 - current_player: diag_b_enemy += 1

    bonus += pow(diag_a_player, 2) - pow(diag_a_enemy, 2)
    #bonus += diag_a_player - diag_a_enemy
        
    bonus += pow(diag_b_player, 2) - pow(diag_b_enemy, 2)
    bonus += diag_b_player - diag_b_enemy

    #bonus += 15 if board[2, 2] == current_player else -16 # the center of the board is used in four winning combination

    #win_reward = 300  # 300 = 25 x 12 = 5^2 x winning lines
    win_reward = 60  # 60 = 5 x 12 = 5 x winning lines

    base_reward = -1 * win_reward # winner == 1 - current_player
    if winner == current_player: base_reward = win_reward
    elif winner == -1: base_reward = 0

    return base_reward + bonus

def policy(board, player_id):

    game = Dummy_Game()

    best_move = None
    best_eval = float('-inf')

    for om in ALL_MOVES:
        from_pos, move = om
        new_board, ok = game.single_move(board, from_pos, move, player_id)
        if ok:
            winner = game.check_winner_board(new_board)
            om_eval = evaluate_board(new_board, winner, player_id)

            if om_eval > best_eval:
                best_eval = om_eval
                best_move = om

    return best_move

In [4]:
class MyPlayer(Player):
    def __init__(self, until_draw_base= 50) -> None:
        super().__init__()

        self.until_draw_base = until_draw_base
        self.last_move = (None, None, None)
        self.until_draw = until_draw_base

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        board = game.get_board()
        player_id = game.current_player_idx

        from_pos, move = policy(game.get_board(), player_id)

        state = tuple(board.flatten())
        if (state, from_pos, move) == self.last_move:
            self.until_draw -= 1
            if self.until_draw == 0:
                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                while board[from_pos] == 1 - player_id: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                self.until_draw = self.until_draw_base
        else:
            self.until_draw = self.until_draw_base
            self.last_move = (state, from_pos, move)

        return from_pos, move


## tests

In [6]:
ThePlayer = MyPlayer()

wins_first = 0
wins_second = 0
n_trials = 1000

for i_g in tqdm(range(n_trials)):

    game = Game()
    winner = game.play(ThePlayer, RandomPlayer())
    if winner == 0:
        wins_first += 1

    game = Game()
    winner = game.play(RandomPlayer(), ThePlayer)
    if winner == 1:
        wins_second += 1

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [01:33<00:00, 10.70it/s]

Player won 999 / 1000 as first
Player won 999 / 1000 as second





In [61]:
ThePlayer = MyPlayer()

wins_first = 0
wins_second = 0
n_trials = 10000

for i_g in tqdm(range(n_trials)):

    game = Game()
    winner = game.play(ThePlayer, RandomPlayer())
    if winner == 0:
        wins_first += 1

    game = Game()
    winner = game.play(RandomPlayer(), ThePlayer)
    if winner == 1:
        wins_second += 1

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

100%|██████████| 10000/10000 [15:49<00:00, 10.53it/s]

Player won 9993 / 10000 as first
Player won 9996 / 10000 as second



