In [1]:
# %load callbacks.py
import os
import pickle
import random

import numpy as np

ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'WAIT', 'BOMB']


def setup(self):
    """
    Setup your code. This is called once when loading each agent.
    Make sure that you prepare everything such that act(...) can be called.

    When in training mode, the separate `setup_training` in train.py is called
    after this method. This separation allows you to share your trained agent
    with other students, without revealing your training code.

    In this example, our model is a set of probabilities over actions
    that are independent of the game state.

    :param self: This object is passed to all callbacks and you can set arbitrary values.
    """
    if self.train or not os.path.isfile("my-saved-model.pt"):
        self.logger.info("Setting up model from scratch.")
        weights = np.random.rand(len(ACTIONS))
        self.model = weights / weights.sum()
    else:
        self.logger.info("Loading model from saved state.")
        with open("my-saved-model.pt", "rb") as file:
            self.model = pickle.load(file)


def look_for_targets(free_space, start, targets, logger=None):
    """Find direction of the closest target that can be reached via free tiles.

    Performs a breadth-first search of the reachable free tiles until a target is encountered.
    If no target can be reached, the path that takes the agent closest to any target is chosen.

    Args:
        free_space: Boolean numpy array. True for free tiles and False for obstacles.
        start: the coordinate from which to begin the search.
        targets: list or array holding the coordinates of all target tiles.
        logger: optional logger object for debugging.
    Returns:
        coordinate of first step towards the closest target or towards tile closest to any target.
    """
    if len(targets) == 0:
        return None
    
    frontier = [start]
    parent_dict = {start: start}
    dist_so_far = {start: 0}
    best = start
    best_dist = np.sum(np.abs(np.subtract(targets, start)), axis=1).min()

    while len(frontier) > 0:
        current = frontier.pop(0)
        # Find distance from current position to all targets, track closest
        d = np.sum(np.abs(np.subtract(targets, current)), axis=1).min()
        if d + dist_so_far[current] <= best_dist:
            best = current
            best_dist = d + dist_so_far[current]
        if d == 0:
            # Found path to a target's exact position, mission accomplished!
            best = current
            break
        # Add unexplored free neighboring tiles to the queue in a random order
        x, y = current
        neighbors = [(x_n, y_n) for (x_n, y_n) in [(x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)] if free_space[x_n, y_n]]
        random.shuffle(neighbors)
        for neighbor in neighbors:
            if neighbor not in parent_dict:
                frontier.append(neighbor)
                parent_dict[neighbor] = current
                dist_so_far[neighbor] = dist_so_far[current] + 1
    if logger: logger.debug(f'Suitable target found at {best}')
    # Determine the first step towards the best found target tile
    current = best
    while True:
        if parent_dict[current] == start: return current
        current = parent_dict[current]


def state_to_features(game_state: dict) -> np.array:
    """
    *This is not a required function, but an idea to structure your code.*

    Converts the game state to the input of your model, i.e.
    a feature vector.

    You can find out about the state of the game environment via game_state,
    which is a dictionary. Consult 'get_state_for_agent' in environment.py to see
    what it contains.

    :param game_state:  A dictionary describing the current game board.
    :return: np.array
    """
    # This is the dict before the game begins and after it ends
    if game_state is None:
        return None

    ### design features for Task 1 ###
    """
    np.array where each component corresponds to on neighbour field
    = 0 if wall or crate
    = 1 if free
    = 2 if free and (one) nearest field to nearest coin
    """
    X = np.zeros(4) # hand-crafted feature vector
    
    free_space = game_state['field'] == 0 # Boolean numpy array. True for free tiles and False for Crates & Walls
    agent_x, agent_y = game_state['self'][3] # Agent position as coordinates 
    coin_direction = look_for_targets(free_space, (agent_x, agent_y), game_state['coins']) # neighbouring field closest to closest coin

    neighbours = [(agent_x + 1, agent_y), (agent_x - 1, agent_y), (agent_x, agent_y + 1), (agent_x, agent_y - 1)]
    
    for j, neighbour in enumerate(neighbours):
        if neighbour == coin_direction: 
            X[j] = 2
        elif free_space[neighbour[0], neighbour[1]]:
            X[j] = 1
    
    return(X)    
            
            
def act(self, game_state: dict) -> str:
    """
    Your agent should parse the input, think, and take a decision.
    When not in training mode, the maximum execution time for this method is 0.5s.

    :param self: The same object that is passed to all of your callbacks.
    :param game_state: The dictionary that describes everything on the board.
    :return: The action to take as a string.
    """
    # todo Exploration vs exploitation
    random_prob = .1
    if self.train and random.random() < random_prob:
        self.logger.debug("Choosing action purely at random.")
        # 80%: walk in any direction. 10% wait. 10% bomb.
        return np.random.choice(ACTIONS, p=[.2, .2, .2, .2, .1, .1])

    self.logger.debug("Querying model for action.")
    return np.random.choice(ACTIONS, p=self.model)

In [7]:
# test -> successfull! :))

def build_arena(COLS = 17, ROWS = 17):
    WALL = -1
    FREE = 0
    arena = np.zeros((COLS, ROWS), int)

    # Walls
    arena[:1, :] = WALL
    arena[-1:, :] = WALL
    arena[:, :1] = WALL
    arena[:, -1:] = WALL
    for x in range(COLS):
        for y in range(ROWS):
            if (x + 1) * (y + 1) % 2 == 1:
                arena[x, y] = WALL
    
    return(arena)

field_test = build_arena()
coins_test = [(4,1), (1,3)]
start_test = (1,1)
self_test = ('my_agent_1', 0, 0, start_test)
game_state_test = {'field' : build_arena(), 'coins': coins_test, 'self': self_test}

arena_test = build_arena() 
arena_test[start_test[0], start_test[1]] = 8
for coin in coins_test:
    arena_test[coin[0],coin[1]] = 3


print(f"Where to go: {state_to_features(game_state_test)}" + "\n")
print(arena_test)

Where to go: [1. 0. 2. 0.]

[[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1  8  0  3  0  0  0  0  0  0  0  0  0  0  0  0 -1]
 [-1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1]
 [-1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1]
 [-1  3 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1]
 [-1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1]
 [-1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1]
 [-1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1]
 [-1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1]
 [-1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1]
 [-1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1]
 [-1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1]
 [-1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1]
 [-1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1]
 [-1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1  0 -1]
 [-1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1]
 [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]]
