In [None]:
import heapq
import numpy as np
from icecream import ic
from random import choice
from tqdm.auto import tqdm
from collections import namedtuple

In [None]:
PUZZLE_DIM = 3
Action = namedtuple('Action', ['pos1', 'pos2'])

In [None]:
def available_actions(state: np.ndarray) -> list['Action']:
    x_start, y_start = [int(_[0]) for _ in np.where(state == 0)]
    actions = list()
    if x_start > 0:
        actions.append(Action((x_start, y_start), (x_start-1, y_start)))
    if x_start < PUZZLE_DIM - 1:
        actions.append(Action((x_start, y_start), (x_start+1, y_start)))
    if y_start > 0:
        actions.append(Action((x_start, y_start), (x_start, y_start-1)))
    if y_start < PUZZLE_DIM - 1:
        actions.append(Action((x_start, y_start), (x_start, y_start+1)))
    return actions

def do_action(state: np.ndarray, action: 'Action') -> np.ndarray:
    new_state = state.copy()
    new_state[action.pos1], new_state[action.pos2] = new_state[action.pos2], new_state[action.pos1]
    return new_state


In [None]:
def manhattan_distance(state: np.ndarray) -> int:
    total_distance = 0
    correct_pos = {n : (n // PUZZLE_DIM, n - PUZZLE_DIM * (n // PUZZLE_DIM)) for n in range(PUZZLE_DIM**2)}
    for n in range(PUZZLE_DIM**2):
        x_current, y_current = [int(_[0]) for _ in np.where(state == n)]
        total_distance += abs(x_current - correct_pos[n][0]) + abs(y_current - correct_pos[n][1])
    return total_distance

In [None]:
RANDOMIZE_STEPS = 1000
GOAL_STATE = np.array([i for i in range(0, PUZZLE_DIM**2)]).reshape((PUZZLE_DIM, PUZZLE_DIM))

initial_state = GOAL_STATE.copy()
for step in range(RANDOMIZE_STEPS): # use tqdm
    initial_state = do_action(initial_state, choice(available_actions(initial_state)))
    
h_distance = manhattan_distance(initial_state)
print(initial_state)
print(h_distance)

In [None]:
class Node:
    def __init__(self, current_state, parent):
        self.current_state = current_state
        self.parent = parent

In [49]:
frontier = []
explored = set()
counter = 0
heapq.heappush(frontier, (0 + h_distance, 0, counter, Node(initial_state, None)))
counter += 1

while len(frontier) != 0:
    f, g, _ , node = heapq.heappop(frontier)

    if np.array_equal(node.current_state, GOAL_STATE):
        print("GOAL")
        break
        """path = []
        current_node = node
        while current_node.parent is not None:
            path.append(current_node.current_state)
            current_node.current_state = current_node.parent
        print(path[::-1])"""
    else:
        for action in available_actions(node.current_state):
            new_state = do_action(node.current_state, action)
            if tuple(new_state.flatten()) not in explored:
                explored.add(tuple(new_state.flatten()))
                h_distance = manhattan_distance(new_state)
                heapq.heappush(frontier, (g + 1 + h_distance, g + 1, counter, Node(new_state, node.current_state)))
                counter += 1


KeyboardInterrupt: 