Copyright **`(c)`** 2024 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free under certain conditions — see the [`license`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [1]:
from collections import namedtuple
from random import choice
from tqdm.auto import tqdm
import numpy as np
from icecream import ic


In [2]:
PUZZLE_DIM = 4
action = namedtuple('Action', ['pos1', 'pos2']) 
#to store a tuple sublass with named fields 



In [3]:
def available_actions(state: np.ndarray) -> list['Action']:
    x, y = [int(_[0]) for _ in np.where(state == 0)] # row,column
    actions = list()
    if x > 0:
        actions.append(action((x, y), (x - 1, y)))
    if x < PUZZLE_DIM - 1:
        actions.append(action((x, y), (x + 1, y)))
    if y > 0:
        actions.append(action((x, y), (x, y - 1)))
    if y < PUZZLE_DIM - 1:
        actions.append(action((x, y), (x, y + 1)))
    return actions



def do_action(state: np.ndarray, action: 'Action') -> np.ndarray:
    new_state = state.copy()
    new_state[action.pos1], new_state[action.pos2] = new_state[action.pos2], new_state[action.pos1]
    return new_state





In [4]:
import functools


def counter(fn):
    """Simple decorator for counting number of calls"""

    @functools.wraps(fn)
    def helper(*args, **kargs):
        helper.calls += 1
        return fn(*args, **kargs)

    helper.calls = 0
    return helper


goal = np.array([i for i in range(1, PUZZLE_DIM**2)] + [0]).reshape((PUZZLE_DIM, PUZZLE_DIM))
ic(goal)

def goal_test(state: np.ndarray):
    return np.array_equal(state, goal)

@counter
def difference_from_goal(state: np.ndarray) -> int:
    return np.sum(state != np.array([i for i in range(1, PUZZLE_DIM**2)] + [0]).reshape((PUZZLE_DIM, PUZZLE_DIM)))
    

def manhattan_distance(state: np.ndarray) -> int:
    """Calculate the Manhattan distance of the current state to the goal state."""
    distance = 0
    goal_positions = {i: (i // PUZZLE_DIM, i % PUZZLE_DIM) for i in range(1, PUZZLE_DIM ** 2)}
    for x in range(PUZZLE_DIM):
        for y in range(PUZZLE_DIM):
            value = state[x, y]
            if value != 0:
                goal_x, goal_y = goal_positions[value]
                distance += abs(x - goal_x) + abs(y - goal_y)
    return distance


ic| goal: array([[ 1,  2,  3,  4],
                 [ 5,  6,  7,  8],
                 [ 9, 10, 11, 12],
                 [13, 14, 15,  0]])


In [5]:
RANDOMIZE_STEPS = 100_000
state = np.array([i for i in range(1, PUZZLE_DIM**2)] + [0]).reshape((PUZZLE_DIM, PUZZLE_DIM))
for r in tqdm(range(RANDOMIZE_STEPS), desc='Randomizing'):
    state = do_action(state, choice(available_actions(state)))
state

Randomizing:   0%|          | 0/100000 [00:00<?, ?it/s]

array([[ 4, 14, 11, 10],
       [ 6,  2,  3,  1],
       [ 8,  5, 12,  9],
       [ 7,  0, 15, 13]])

In [None]:
import heapq

BEAM_SIZE = 4

priority_queue = []
visited = set()


# Add the initial state to the queue
initial_priority = difference_from_goal(state)
heapq.heappush(priority_queue, (initial_priority, tuple(map(tuple, state))))  # Convert initial state to tuple format nparray gives problems

print("Initial State: ")
print(state)

while priority_queue and not goal_test(state):
    

    # Pop the state with the highest priority (lowest difference from goal)
    priority, state_tuple = heapq.heappop(priority_queue)
    state = np.array(state_tuple)

    # Add to visited to avoid re-processing this state
    visited.add(state_tuple)

    # Generate new states from the current state
    for _ in range(BEAM_SIZE):
        new_state = do_action(state, choice(available_actions(state))) #randomly select one possible action between legal ones
        new_state_tuple = tuple(map(tuple, new_state)) # heapq doesn't work with nparrays 

        # Only add the new state if it hasn't been visited
        if new_state_tuple not in visited:
            heapq.heappush(priority_queue, (0.8 * difference_from_goal(new_state) + 0.2 * manhattan_distance(new_state), new_state_tuple))

# Final output
print("Number of actions evaluated: ")
print(difference_from_goal.calls)

print("Final State:")
print(state)

#Suggested parameters for priority estimation:
#N = 3, 4 only difference_from_goal is sufficient
#N = 5 use 80% difference and 20% Manhattan distance

Initial State: 
[[ 4 14 11 10]
 [ 6  2  3  1]
 [ 8  5 12  9]
 [ 7  0 15 13]]
Number of actions evaluated: 
440497
Final State:
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15  0]]
