In [0]:
import numpy as np
import heapq as hq
from itertools import count

In [0]:
class Node:
  def __init__(self, state, parent, cost, action):
    self.state = state
    self.parent = parent
    self.g = cost
    self.f = self.g
    self.a = action # Action applied to reach the state L, R, T or D

  # Heuristic: sum of manhattan distances of each tile.. blank included
  def h(self, goal):
    sum = 0
    for i in range(3):
      for j in range(3):
        m, n = np.where(goal == self.state[i][j])
        m, n = int(m), int(n)
        sum += abs(i - m) + abs(j - n)
    return sum

  def expand(self):
    i, j = np.where(self.state == 0)
    i, j = int(i), int(j) # Current position of the blank -> {0}
    children = []

    # Move the blank one step at a time
    if self.a != 'D' and i - 1 >= 0:
        s = self.state.copy()
        s[i][j] = s[i - 1][j]
        s[i - 1][j] = 0
        children.append(Node(s, self, self.g + 1, 'T'))

    if self.a != 'R' and j - 1 >= 0:
        s = self.state.copy()
        s[i][j] = s[i][j - 1]
        s[i][j - 1] = 0
        children.append(Node(s, self, self.g + 1, 'L'))
    
    if self.a != 'L' and j + 1 <= 2:
        s = self.state.copy()
        s[i][j] = s[i][j + 1]
        s[i][j + 1] = 0
        children.append(Node(s, self, self.g + 1, 'R'))

    if self.a != 'T' and i + 1 <= 2:
        s = self.state.copy()
        s[i][j] = s[i + 1][j]
        s[i + 1][j] = 0
        children.append(Node(s, self, self.g + 1, 'D'))
    return children

In [0]:
def print_soln(sol):
  if sol.parent == None:
    print(sol.state, '\n')
    return
  print_soln(sol.parent)
  print(sol.state, "\n")

In [0]:
def RBFS(node, goal, f_limit):
  if np.allclose(goal, node.state):
    print("Solution found")
    print_soln(node)
    return ("cost:", node.f)
  
  scsrs = node.expand()
  if len(scsrs) == 0:
    return ('failure', float('inf'))
  pq = []
  u = count()

  for s in scsrs:
    s.f = max(s.h(goal) + s.g, node.f)
    hq.heappush(pq, (s.f, next(u), s))
  
  while True:
    f_best, u1, best = hq.heappop(pq)
    if best.f > f_limit:
      return ('failure', best.f)

    try:
      f_alt, u2, alt = hq.heappop(pq)
      hq.heappush(pq, (f_alt, u2, alt))
    except IndexError:
      f_alt = f_limit
      
    result, best.f = RBFS(best, goal, min(f_limit, f_alt))
    hq.heappush(pq, (best.f, u1, best))
    # print(result, best.f)
    if result != 'failure':
      return (result, best.f)

In [12]:
puzzle = np.asarray([[7, 0, 4], [5, 2, 6], [8, 3, 1]])

# puzzle = np.arange(9).reshape(3, 3)
# np.random.shuffle(puzzle) # For a random initial state

goal = list(np.arange(1, 9))
goal.append(0)
goal = np.asarray(goal).reshape(3, 3)

start = Node(puzzle, None, 0, '')
RBFS(start, goal, float('inf'))

Solution found
[[7 0 4]
 [5 2 6]
 [8 3 1]] 

[[7 2 4]
 [5 0 6]
 [8 3 1]] 

[[7 2 4]
 [5 3 6]
 [8 0 1]] 

[[7 2 4]
 [5 3 6]
 [8 1 0]] 

[[7 2 4]
 [5 3 0]
 [8 1 6]] 

[[7 2 4]
 [5 0 3]
 [8 1 6]] 

[[7 2 4]
 [0 5 3]
 [8 1 6]] 

[[0 2 4]
 [7 5 3]
 [8 1 6]] 

[[2 0 4]
 [7 5 3]
 [8 1 6]] 

[[2 4 0]
 [7 5 3]
 [8 1 6]] 

[[2 4 3]
 [7 5 0]
 [8 1 6]] 

[[2 4 3]
 [7 0 5]
 [8 1 6]] 

[[2 4 3]
 [7 1 5]
 [8 0 6]] 

[[2 4 3]
 [7 1 5]
 [0 8 6]] 

[[2 4 3]
 [0 1 5]
 [7 8 6]] 

[[2 4 3]
 [1 0 5]
 [7 8 6]] 

[[2 0 3]
 [1 4 5]
 [7 8 6]] 

[[0 2 3]
 [1 4 5]
 [7 8 6]] 

[[1 2 3]
 [0 4 5]
 [7 8 6]] 

[[1 2 3]
 [4 0 5]
 [7 8 6]] 

[[1 2 3]
 [4 5 0]
 [7 8 6]] 

[[1 2 3]
 [4 5 6]
 [7 8 0]] 



('cost:', 25)