In [1]:
from collections import namedtuple
from random import choice
from tqdm.auto import tqdm
import numpy as np

In [29]:
PUZZLE_DIM_N = 3

In [30]:
def generate_random_estate(PUZZLE_DIM_N):

    # Create a list of numbers from 0 to n^2 - 1
    elements = np.arange(PUZZLE_DIM_N**2)

    # Shuffle the elements randomly
    np.random.shuffle(elements)

    # Reshape the shuffled array into an n x n matrix
    random_estate = elements.reshape(PUZZLE_DIM_N, PUZZLE_DIM_N)

    return random_estate


def end_state(PUZZLE_DIM_N):

    # Create a list of numbers from 1 to n^2 - 1
    elements = np.arange(1, PUZZLE_DIM_N**2)

    # Append a zero at the end
    elements = np.append(elements, 0)

    # Reshape into an n x n matrix
    end_state_matrix = elements.reshape(PUZZLE_DIM_N, PUZZLE_DIM_N)

    return end_state_matrix


# Finds the position of the zero element in the matrix and determines all possible moves for it.
def available_actions(state):
    
    moves = []  # List to store possible moves

    # Find the position of the zero element
    zero_pos = tuple(np.argwhere(state == 0)[0])

    # Possible move directions: (row_change, col_change)
    directions = [(-1, 0), (1, 0), (0, -1), (0, 1)]

    for dr, dc in directions:
        new_row, new_col = zero_pos[0] + dr, zero_pos[1] + dc

        # Check if the move is within bounds
        if 0 <= new_row < PUZZLE_DIM_N and 0 <= new_col < PUZZLE_DIM_N:
            moves.append((new_row, new_col))

    # Return the current position of zero and its possible moves
    return [zero_pos] + moves

In [36]:
# Initial and goal states
init_state = generate_random_estate(PUZZLE_DIM_N)
goal_state = end_state(PUZZLE_DIM_N)

new_state = init_state.copy()
steps = 0
max_steps = 1000000

print("Initial State:")
print(new_state)

while not np.array_equal(new_state, goal_state) and steps <= max_steps:
    steps += 1

    # Get available actions
    possible_actions = available_actions(new_state)

    # Choose a random action
    random_action = possible_actions[np.random.randint(1, len(possible_actions))]

    # Move zero
    zero_i, zero_j = possible_actions[0]
    new_zero_i, new_zero_j = random_action
    new_state[zero_i, zero_j], new_state[new_zero_i, new_zero_j] = new_state[new_zero_i, new_zero_j], new_state[zero_i, zero_j]

    # Print every 100 steps
    #if steps % 100 == 0:
        #print(f"Step {steps}:\n{new_state}")

# Check outcome
if np.array_equal(new_state, goal_state):
    print(f"\nSolved in {steps} steps!")
else:
    print(f"\nNot solved in {steps} steps.")


Initial State:
[[0 2 3]
 [7 4 5]
 [8 1 6]]

Solved in 367510 steps!
