In [3]:
import time
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np

15 Puzzle Domain

In [5]:
class Puzzle15:
    def __init__(self, state):
        self.state = state
        self.goal = np.array([[1, 2, 3, 4], 
                              [5, 6, 7, 8], 
                              [9, 10, 11, 12], 
                              [13, 14, 15, 0]])

    def is_goal(self):
        return np.array_equal(self.state, self.goal)

    def get_neighbors(self):
        neighbors = []
        zero_pos = tuple(np.argwhere(self.state == 0)[0])
        directions = [(-1, 0), (1, 0), (0, -1), (0, 1)]
        
        for direction in directions:
            new_pos = (zero_pos[0] + direction[0], zero_pos[1] + direction[1])
            if 0 <= new_pos[0] < 4 and 0 <= new_pos[1] < 4:
                new_state = self.state.copy()
                new_state[zero_pos], new_state[new_pos] = new_state[new_pos], new_state[zero_pos]
                neighbors.append((new_state, 1))  # (new state, cost)
        
        return neighbors


Manhattan Distance Heuristic

In [6]:
class Heuristics:
    @staticmethod
    def manhattan_distance(state, goal):
        distance = 0
        for x in range(4):
            for y in range(4):
                value = state[x, y]
                if value != 0:
                    target_x, target_y = divmod(value - 1, 4)
                    distance += abs(x - target_x) + abs(y - target_y)
        return distance

    @staticmethod
    def misplaced_tiles(state, goal):
        return np.sum(state != goal) - 1  # subtract 1 for the blank space


IDA* Algorithm

In [7]:
class IDAStar:
    def __init__(self, puzzle, heuristic_func):
        self.puzzle = puzzle
        self.heuristic_func = heuristic_func

    def search(self):
        threshold = self.heuristic_func(self.puzzle.state, self.puzzle.goal)
        while True:
            temp = self._search(self.puzzle.state, 0, threshold)
            if temp == -1:
                return True
            if temp == float('inf'):
                return False
            threshold = temp

    def _search(self, state, g, threshold):
        f = g + self.heuristic_func(state, self.puzzle.goal)
        if f > threshold:
            return f
        if np.array_equal(state, self.puzzle.goal):
            return -1
        min_threshold = float('inf')
        for neighbor, cost in self.puzzle.get_neighbors():
            temp = self._search(neighbor, g + cost, threshold)
            if temp == -1:
                return -1
            if temp < min_threshold:
                min_threshold = temp
        return min_threshold


Neural Network with aleostemic and Epistemic uncertainty

In [8]:
class BayesianNN:
    def __init__(self, input_shape):
        self.model = self.build_model(input_shape)

    def build_model(self, input_shape):
        model = tf.keras.Sequential([
            tf.keras.layers.InputLayer(input_shape=input_shape),
            tfp.layers.DenseFlipout(128, activation='relu'),
            tfp.layers.DenseFlipout(64, activation='relu'),
            tfp.layers.DenseFlipout(1)
        ])
        return model

    def train(self, x_train, y_train, epochs=50):
        self.model.compile(optimizer='adam', 
                           loss=tf.keras.losses.MeanSquaredError(), 
                           metrics=['mse'])
        self.model.fit(x_train, y_train, epochs=epochs)

    def predict(self, x):
        return self.model(x)



Generate Task Prac Algorithm

In [9]:
def generate_task_prac(puzzle, num_tasks_per_iter, length_inc):
    tasks = []
    for inc in length_inc:
        for _ in range(num_tasks_per_iter):
            state = puzzle.goal.copy()
            for _ in range(inc):
                neighbors = puzzle.get_neighbors()
                state, _ = neighbors[np.random.choice(len(neighbors))]
            tasks.append(state)
    return tasks

initial_state = np.array([[1, 2, 3, 4], 
                          [5, 6, 7, 8], 
                          [9, 10, 11, 12], 
                          [13, 14, 0, 15]])
puzzle = Puzzle15(initial_state)
tasks_prac = generate_task_prac(puzzle, num_tasks_per_iter=10, length_inc=[1, 2, 4, 6, 8, 10])

print("Generated tasks using GenerateTaskPrac:", len(tasks_prac))

Generated tasks using GenerateTaskPrac: 60


Generate Random Tasks

In [8]:
def generate_training_data(puzzle, num_samples=1000):
    data = []
    for _ in range(num_samples):
        state = puzzle.state.copy()
        # Randomize the state with a series of valid moves
        for _ in range(50):
            neighbors = puzzle.get_neighbors()
            state, _ = neighbors[np.random.choice(len(neighbors))]
        heuristic_value = Heuristics.manhattan_distance(state, puzzle.goal)
        data.append((state.flatten(), heuristic_value))
    return data

def main():
    initial_state = np.array([[1, 2, 3, 4], 
                              [5, 6, 7, 8], 
                              [9, 10, 11, 12], 
                              [13, 14, 0, 15]])
    puzzle = Puzzle15(initial_state)
    heuristic = Heuristics()
    
    # Generate training data
    training_data = generate_training_data(puzzle)
    x_train = np.array([data[0] for data in training_data])
    y_train = np.array([data[1] for data in training_data])
    
    # Train Bayesian Neural Network
    bayesian_nn = BayesianNN(input_shape=(16,))
    bayesian_nn.train(x_train, y_train, epochs=50)
    
    # Integrate Bayesian Neural Network as heuristic function
    def neural_heuristic(state, goal):
        state_flat = state.flatten().reshape(1, -1)
        return bayesian_nn.predict(state_flat)[0, 0].numpy()
    
    # Run IDA* search with neural network heuristic
    ida_star = IDAStar(puzzle, neural_heuristic)
    result = ida_star.search()
    print("Search Result:", result)

if __name__ == "__main__":
    main()


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Search Result: True


Learn Heristic Prac Algorithm

In [14]:
def learn_heuristic_prac(puzzle, num_iter=50, tmax=60, max_steps=1000, dropout_rate=0.25):
    # Generate initial training data
    training_data = generate_training_data(puzzle, num_samples=1000)
    x_train = np.array([data[0] for data in training_data])
    y_train = np.array([data[1] for data in training_data])
    
    # Initialize Bayesian Neural Network
    bayesian_nn = BayesianNN(input_shape=(16,))
    bayesian_nn.train(x_train, y_train, epochs=50)
    
    def neural_heuristic(state, goal):
        state_flat = state.flatten().reshape(1, -1)
        return bayesian_nn.predict(state_flat)[0, 0].numpy()
    
    ida_star = IDAStar(puzzle, neural_heuristic)
    
    for iteration in range(num_iter):
        print(f"Iteration {iteration + 1}/{num_iter}")
        # Train and update the heuristic
        bayesian_nn.train(x_train, y_train, epochs=50)
        
        # Generate new training data
        new_data = generate_training_data(puzzle, num_samples=20)
        x_new = np.array([data[0] for data in new_data])
        y_new = np.array([data[1] for data in new_data])
        
        # Update training dataset
        x_train = np.vstack([x_train, x_new])
        y_train = np.concatenate([y_train, y_new])
    
    return bayesian_nn

Evaluation Against Test datset

In [15]:
def evaluate_on_benchmark(bayesian_nn, benchmarks):
    results = {"generated": [], "time": [], "subopt": [], "optimal": []}
    
    def neural_heuristic(state, goal):
        state_flat = state.flatten().reshape(1, -1)
        return bayesian_nn.predict(state_flat)[0, 0].numpy()
    
    for initial_state in benchmarks:
        puzzle = Puzzle15(initial_state)
        ida_star = IDAStar(puzzle, neural_heuristic)
        
        start_time = time.time()
        result = ida_star.search()
        end_time = time.time()
        
        optimal_cost = Heuristics.manhattan_distance(initial_state, puzzle.goal)
        generated = result['generated'] if result else 0
        time_taken = end_time - start_time
        suboptimality = result['cost'] / optimal_cost if result else float('inf')
        
        results["generated"].append(generated)
        results["time"].append(time_taken)
        results["subopt"].append(suboptimality)
        results["optimal"].append(result is not None and suboptimality == 1)
    
    return results

def main():
    initial_state = np.array([[1, 2, 3, 4], 
                              [5, 6, 7, 8], 
                              [9, 10, 11, 12], 
                              [13, 14, 0, 15]])
    puzzle = Puzzle15(initial_state)
    
    # Train the heuristic using LearnHeuristicPrac
    bayesian_nn = learn_heuristic_prac(puzzle, num_iter=50, tmax=60, max_steps=1000)
    
    # Evaluate on the benchmark tasks
    # List of 100 benchmark initial states for the 15-puzzle
    benchmark_tasks = [
    np.array([[2, 3, 4, 8], [1, 6, 7, 12], [5, 10, 11, 15], [9, 13, 14, 0]]),
    np.array([[2, 3, 4, 8], [1, 6, 7, 12], [5, 10, 11, 0], [9, 13, 14, 15]]),
    np.array([[2, 3, 4, 8], [1, 6, 7, 12], [5, 10, 0, 11], [9, 13, 14, 15]]),
    np.array([[2, 3, 4, 8], [1, 6, 0, 7], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[2, 3, 4, 0], [1, 6, 7, 8], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[2, 3, 0, 4], [1, 6, 7, 8], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[2, 0, 3, 4], [1, 6, 7, 8], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[0, 2, 3, 4], [1, 6, 7, 8], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[1, 0, 3, 4], [2, 6, 7, 8], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[1, 2, 0, 3], [6, 7, 4, 8], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[1, 2, 3, 0], [6, 7, 4, 8], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[1, 2, 3, 4], [6, 0, 7, 8], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[1, 2, 3, 4], [6, 7, 0, 8], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[1, 2, 3, 4], [6, 7, 8, 0], [5, 10, 11, 12], [9, 13, 14, 15]]),
    np.array([[1, 2, 3, 4], [6, 7, 8, 12], [5, 10, 11, 0], [9, 13, 14, 15]]),
    np.array([[1, 2, 3, 4], [6, 7, 8, 12], [5, 10, 0, 11], [9, 13, 14, 15]]),
    np.array([[1, 2, 3, 4], [6, 0, 8, 12], [5, 7, 10, 11], [9, 13, 14, 15]]),
    np.array([[1, 0, 3, 4], [6, 2, 8, 12], [5, 7, 10, 11], [9, 13, 14, 15]]),
    np.array([[1, 2, 3, 4], [6, 7, 8, 12], [5, 0, 10, 11], [9, 13, 14, 15]]),
    np.array([[1, 2, 3, 4], [6, 7, 8, 12], [0, 5, 10, 11], [9, 13, 14, 15]]),
    # 

]

    
    results = evaluate_on_benchmark(bayesian_nn, benchmark_tasks)
    
    # Print results
    for metric, values in results.items():
        print(f"{metric}: {np.mean(values)}")
    
    print("Benchmark evaluation completed.")

if __name__ == "__main__":
    main()


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Iteration 1/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 3