In [None]:
# Imporing the necessary modules
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scipy.stats import norm
import time



In [20]:
def get_neighbors(state):
    empty_index = state.index(0)
    row, col = divmod(empty_index, 4)
    neighbors = []

    def swap_and_create(new_row, new_col):
        new_index = new_row * 4 + new_col
        new_state = state[:]
        new_state[empty_index], new_state[new_index] = new_state[new_index], new_state[empty_index]
        neighbors.append((new_state, 1))

    if row > 0: swap_and_create(row - 1, col)
    if row < 3: swap_and_create(row + 1, col)
    if col > 0: swap_and_create(row, col - 1)
    if col < 3: swap_and_create(row, col + 1)

    return neighbors

def feature_representation(state):
    return np.array(state)

def manhattan_distance(state, goal):
    distance = 0
    for i in range(1, 16):
        current_idx = state.index(i)
        goal_idx = goal.index(i)
        current_row, current_col = divmod(current_idx, 4)
        goal_row, goal_col = divmod(goal_idx, 4)
        distance += abs(current_row - goal_row) + abs(current_col - goal_col)
    return distance


In [21]:
def train_nn_wunn(memory_buffer, max_train_iter, mini_batch_size):
    nn_wunn = Sequential([
        Dense(64, activation='relu', input_shape=(memory_buffer.shape[1] - 1,)),
        Dense(1)
    ])
    nn_wunn.compile(optimizer='adam', loss='mse')

    for _ in range(max_train_iter):
        batch_indices = np.random.choice(len(memory_buffer), mini_batch_size)
        batch = memory_buffer[batch_indices]
        X_batch = batch[:, :-1]
        y_batch = batch[:, -1]
        nn_wunn.fit(X_batch, y_batch, epochs=1, verbose=0)

    return nn_wunn


memory_buffer = np.random.rand(100, 17)
max_train_iter = 5000
mini_batch_size = 100
nn_wunn = train_nn_wunn(memory_buffer, max_train_iter, mini_batch_size)


In [22]:
def heuristic(state, nn_wunn, alpha, yq, epsilon):
    x = feature_representation(state)
    y_hat = nn_wunn.predict(np.array([x]))[0][0]
    sigma_a = 1  # Placeholder for sigma_a, assume 1 for simplicity
    sigma_t = sigma_a if y_hat < yq else epsilon
    y_alpha = norm.ppf(alpha, loc=y_hat, scale=sigma_t)
    return max(y_alpha, 0)


In [23]:
def ida_star(start, goal, heuristic):
    def search(path, g, threshold):
        current_state = path[-1]
        f = g + heuristic(current_state)
        if f > threshold:
            return f
        if current_state == goal:
            return path
        min_threshold = float('inf')
        for neighbor, move_cost in get_neighbors(current_state):
            if neighbor not in path:
                path.append(neighbor)
                temp = search(path, g + move_cost, threshold)
                if isinstance(temp, list):
                    return temp
                if temp < min_threshold:
                    min_threshold = temp
                path.pop()
        return min_threshold

    threshold = heuristic(start)
    path = [start]
    while True:
        temp = search(path, 0, threshold)
        if isinstance(temp, list):
            return temp, len(path)
        if temp == float('inf'):
            return None, len(path)
        threshold = temp


In [24]:
alpha = 0.95
yq = 1
epsilon = 0.05

def solve_and_update(memory_buffer, tasks, nn_wunn, alpha, epsilon, yq):
    memory_buffer_list = memory_buffer.tolist()  # Convert to list for appending

    for task in tasks:
        start, goal = task
        path, nodes_generated = ida_star(start, goal, lambda s: heuristic(s, nn_wunn, alpha, yq, epsilon))

        if path:
            for state in path:
                if state != goal:
                    cost_to_goal = len(path) - path.index(state)  # Simple cost calculation
                    features = feature_representation(state)
                    memory_buffer_list.append(np.append(features, cost_to_goal))

    return np.array(memory_buffer_list)  # Convert back to numpy array if needed

# Example usage:
tasks = [
    ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 15], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0]),
    ([14, 13, 15, 7, 11, 12, 9, 5, 6, 0, 2, 1, 4, 8, 10, 3], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0]),
    ([13, 5, 4, 10, 9, 12, 8, 14, 2, 3, 7, 1, 0, 15, 11, 6], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0])
]
memory_buffer = solve_and_update(memory_buffer, tasks, nn_wunn, 0.95, 1, 0.05)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


KeyboardInterrupt: 

In [None]:
def collect_performance_data(test_tasks, nn_wunn, alpha, yq, epsilon):
    performance_data = []

    for task in test_tasks:
        start, goal = task
        start_time = time.time()
        path, nodes_generated = ida_star(start, goal, lambda s: heuristic(s, nn_wunn, alpha, yq, epsilon))
        end_time = time.time()
        planning_time = end_time - start_time
        manhattan_dist = manhattan_distance(start, goal)
        optimal = path is not None and len(path) <= 80  # Assuming optimal if path length <= 80 moves

        performance_data.append((nodes_generated, planning_time, optimal, manhattan_dist))

    return performance_data

# Collect performance data for test tasks
performance_data = collect_performance_data(tasks, nn_wunn, 0.95, 0.05, 1)




In [None]:
# Print performance data in the form of a table
print("Table 1: Performance of NN-WUNN Heuristic on 15-Puzzle")
print("Task | Nodes Generated | Planning Time (s) | Optimal | Manhattan Distance")
for i, (nodes, time, opt, manhattan_dist) in enumerate(performance_data):
    print(f"{i + 1}    | {nodes}             | {time:.4f}            | {opt}     | {manhattan_dist}")