In [37]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from scipy.stats import norm
from collections import deque

In [38]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [39]:
# Define Pattern Databases for the 24-puzzle domain
pdb_24_puzzle_1 = [
    [1, 2, 5, 6, 7],
    [3, 4, 8, 9, 14],
    [10, 15, 16, 20, 21],
    [11, 12, 17, 22],
    [13, 18, 23, 24]
]

# Feature extraction function
def extract_features(state, pdb):
    features = []
    for pattern in pdb:
        features.append(sum(state[i-1] for i in pattern if state[i-1] != 0))  # Adjust index as state is 0-based
    return np.array(features)

# Generate initial and goal states (placeholders)
def generate_initial_state_24_puzzle():
    while True:
        state = np.random.permutation(25)
        if is_solvable(state):
            return state

def generate_goal_state_24_puzzle():
    return np.arange(1, 26)

# Function to check if a 24-puzzle state is solvable
def is_solvable(state):
    inversion_count = 0
    for i in range(len(state)):
        for j in range(i + 1, len(state)):
            if state[i] != 0 and state[j] != 0 and state[i] > state[j]:
                inversion_count += 1
    return inversion_count % 2 == 0

# Erev function (reverse state space function)
def Erev_24_puzzle(s_prime):
    # For demonstration, returning random permutations
    # Replace with actual predecessors logic for 24-puzzle
    return [np.random.permutation(len(s_prime)) for _ in range(5)]


In [40]:

# Neural Network Definitions
class WeightUncertaintyNN(nn.Module):
    def __init__(self, input_dim, output_dim, mu0, sigma0):
        super(WeightUncertaintyNN, self).__init__()
        self.mu0 = mu0
        self.sigma0 = sigma0
        self.fc = nn.Linear(input_dim, output_dim)
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        x = self.dropout(x)
        return self.fc(x)

class FeedForwardNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(FeedForwardNN, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        x = self.dropout(x)
        return self.fc(x)

# Compute sigma_e^2 function with epistemic uncertainty
def compute_sigma_e2(nn_wunn, x, K):
    nn_wunn.train()  # Enable dropout
    samples = np.array([nn_wunn(torch.tensor(x, dtype=torch.float32).to(device)).cpu().detach().numpy() for _ in range(K)])
    nn_wunn.eval()  # Disable dropout
    return np.var(samples)

# Sample from softmax distribution
def sample_from_softmax(states):
    keys, values = zip(*states.items())
    max_value = np.max(values)  # Find the maximum value for numerical stability
    exp_values = np.exp(values - max_value)  # Subtract max_value from values before exponentiation
    softmax_probs = exp_values / np.sum(exp_values)  # Calculate softmax probabilities
    selected_index = np.random.choice(len(keys), p=softmax_probs)
    return keys[selected_index], values[selected_index]


In [42]:

# Generate Task Practical Implementation
def generate_task_prac_24_puzzle(nn_wunn, epsilon, max_steps, K, initial_state, goal_state):
    s_prime = initial_state
    num_steps = 0
    s_double_prime = None
    
    while num_steps < max_steps:
        num_steps += 1
        states = {}
        
        for s in Erev_24_puzzle(s_prime):
            if s_double_prime is not None and np.array_equal(s_double_prime, s):
                continue
            
            x = extract_features(s, pdb_24_puzzle_1)  # Change pdb as needed
            sigma_e2 = compute_sigma_e2(nn_wunn, x, K)
            states[tuple(s)] = sigma_e2
        
        if len(states) == 0:
            print("No states to choose from, breaking.")
            break
        
        s, sigma_e2 = sample_from_softmax(states)
        
        if sigma_e2 >= epsilon:
            T = (s, goal_state)
            print(f"Generated Task: {T}")
            return T
        
        s_double_prime = s_prime
        s_prime = s
    
    return None  # If max_steps reached without finding a suitable task


In [43]:

# Task Solving with IDA*
def ida_star(T, h, alpha, nn_ffnn, tmax):
    def search(path, g, bound):
        node = path[-1]
        features = extract_features(node, pdb_24_puzzle_1)  # Extract features before passing to the network
        f = g + h(alpha, nn_ffnn(torch.tensor(features, dtype=torch.float32).to(device)).cpu().item())
        
        if f > bound:
            return f, None
        if np.array_equal(node, T[1]):
            return f, path
        
        min_bound = float('inf')
        for succ in Erev_24_puzzle(node):
            if tuple(succ) not in path:
                path.append(tuple(succ))
                t, result = search(path, g + cost(node, succ), bound)
                if result is not None:
                    return t, result
                if t < min_bound:
                    min_bound = t
                path.pop()
        return min_bound, None
    
    initial_features = extract_features(T[0], pdb_24_puzzle_1)  # Extract features before passing to the network
    bound = h(alpha, nn_ffnn(torch.tensor(initial_features, dtype=torch.float32).to(device)).cpu().item())
    path = [tuple(T[0])]
    
    while True:
        t, result = search(path, 0, bound)
        if result is not None:
            return result
        if t == float('inf'):
            return None  # No solution found
        bound = t

def cost(node, succ):
    # Placeholder cost function
    return 1

def is_goal(node, goal):
    return np.array_equal(node, goal)

def h(alpha, nn_ffnn_output):
    return norm.ppf(alpha, loc=nn_ffnn_output, scale=1)  # Assuming scale=1 for simplicity


In [47]:

def learn_heuristic_prac_24_puzzle(params):
    nn_wunn = WeightUncertaintyNN(input_dim=5, output_dim=1, mu0=params['mu0'], sigma0=params['sigma0']).to(device)
    nn_ffnn = FeedForwardNN(input_dim=5, output_dim=1).to(device)
    memory_buffer = deque(maxlen=params['MemoryBufferMaxRecords'])
    yq = -np.inf
    alpha = params['alpha0']
    beta = params['beta0']
    update_beta = True
    optimizer_wunn = optim.Adam(nn_wunn.parameters())
    optimizer_ffnn = optim.Adam(nn_ffnn.parameters())
    criterion = nn.MSELoss()
    results = []
    
    for n in range(params['NumIter']):
        num_solved = 0
        times = []
        generated_nodes = []
        suboptimalities = []
        optimal_solutions = []

        for i in range(params['NumTasksPerIter']):
            initial_state = generate_initial_state_24_puzzle()
            goal_state = generate_goal_state_24_puzzle()
            T = generate_task_prac_24_puzzle(nn_wunn, params['epsilon'], params['MaxSteps'], params['K'], initial_state, goal_state)
            
            if T is not None:
                plan = ida_star(T, h, alpha, nn_ffnn, params['tmax'])
                
                if plan:
                    num_solved += 1
                    for sj in plan:
                        if np.array_equal(sj, goal_state):
                            yj = cost_to_goal(sj)
                            xj = extract_features(sj, pdb_24_puzzle_1)  # Change pdb as needed
                            memory_buffer.append((xj, yj))
                    print(f"Task solved. Current memory buffer size: {len(memory_buffer)}")
                    # Collect statistics
                    times.append(np.random.random())  # Placeholder for time
                    generated_nodes.append(np.random.randint(1000, 10000))  # Placeholder for generated nodes
                    suboptimalities.append(np.random.random())  # Placeholder for suboptimality
                    optimal_solutions.append(np.random.random())  # Placeholder for optimal solutions
                else:
                    print("Failed to solve task.")
        
        if num_solved < params['NumTasksPerIterThresh']:
            alpha = max(alpha - params['delta'], 0.5)
            update_beta = False
        else:
            update_beta = True
        
        # Train FFNN
        if len(memory_buffer) > 0:
            inputs, targets = zip(*random.sample(memory_buffer, min(len(memory_buffer), params['MiniBatchSize'])))
            inputs = torch.tensor(inputs, dtype=torch.float32).to(device)
            targets = torch.tensor(targets, dtype=torch.float32).to(device)
            optimizer_ffnn.zero_grad()
            outputs = nn_ffnn(inputs).squeeze()
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer_ffnn.step()
            print(f"FFNN Training Loss: {loss.item()}")
        
        # Train WUNN
        if len(memory_buffer) > 0:
            for _ in range(params['MaxTrainIter']):
                inputs, targets = zip(*random.sample(memory_buffer, min(len(memory_buffer), params['MiniBatchSize'])))
                inputs = torch.tensor(inputs, dtype=torch.float32).to(device)
                targets = torch.tensor(targets, dtype=torch.float32).to(device)
                optimizer_wunn.zero_grad()
                outputs = nn_wunn(inputs).squeeze()
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer_wunn.step()
                sigma_e_vals = outputs.cpu().detach().numpy()
                if all(sigma_e < params['kappa'] * params['epsilon'] for sigma_e in sigma_e_vals):
                    break
            print(f"WUNN Training Loss: {loss.item()}")
        
        if update_beta:
            beta = params['gamma'] * beta
        
        if len(memory_buffer) > 0:
            yq = np.quantile([yj for _, yj in memory_buffer], params['q'])
        print(f"Iteration {n+1}/{params['NumIter']} complete. yq: {yq}, num_solved: {num_solved}")
        
        # Store results for this iteration
        results.append({
            'alpha': alpha,
            'times': times,
            'generated_nodes': generated_nodes,
            'suboptimalities': suboptimalities,
            'optimal_solutions': optimal_solutions
        })
    
    return results

# Placeholder for cost to goal
def cost_to_goal(s):
    return np.sum(s)

# Define parameters
params = {
    'NumIter': 100,
    'NumTasksPerIter': 10,
    'NumTasksPerIterThresh': 5,
    'alpha0': 0.95,
    'delta': 0.05,
    'epsilon': 0.1,
    'beta0': 1.0,
    'gamma': 0.9,
    'kappa': 0.1,
    'MaxSteps': 100,
    'MemoryBufferMaxRecords': 1000,
    'TrainIter': 10,
    'MaxTrainIter': 10,
    'MiniBatchSize': 32,
    'tmax': 10,
    'mu0': 0,
    'sigma0': 1,
    'q': 0.9,
    'K': 10
}

# Run the learning algorithm for the 24-puzzle and collect results
results_24_puzzle = learn_heuristic_prac_24_puzzle(params)

Generated Task: ((22, 23, 8, 11, 14, 4, 0, 13, 3, 10, 20, 5, 18, 15, 7, 2, 6, 16, 9, 1, 21, 19, 17, 24, 12), array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25]))


KeyboardInterrupt: 

In [None]:

def learn_heuristic_prac_24_puzzle(params):
    nn_wunn = WeightUncertaintyNN(input_dim=5, output_dim=1, mu0=params['mu0'], sigma0=params['sigma0']).to(device)
    nn_ffnn = FeedForwardNN(input_dim=5, output_dim=1).to(device)
    memory_buffer = deque(maxlen=params['MemoryBufferMaxRecords'])
    yq = -np.inf
    alpha = params['alpha0']
    beta = params['beta0']
    update_beta = True
    optimizer_wunn = optim.Adam(nn_wunn.parameters())
    optimizer_ffnn = optim.Adam(nn_ffnn.parameters())
    criterion = nn.MSELoss()
    results = []
    
    for n in range(params['NumIter']):
        num_solved = 0
        times = []
        generated_nodes = []
        suboptimalities = []
        optimal_solutions = []

        for i in range(params['NumTasksPerIter']):
            initial_state = generate_initial_state_24_puzzle()
            goal_state = generate_goal_state_24_puzzle()
            T = generate_task_prac_24_puzzle(nn_wunn, params['epsilon'], params['MaxSteps'], params['K'], initial_state, goal_state)
            
            if T is not None:
                plan = ida_star(T, h, alpha, nn_ffnn, params['tmax'])
                
                if plan:
                    num_solved += 1
                    for sj in plan:
                        if np.array_equal(sj, goal_state):
                            yj = cost_to_goal(sj)
                            xj = extract_features(sj, pdb_24_puzzle_1)  # Change pdb as needed
                            memory_buffer.append((xj, yj))
                    print(f"Task solved. Current memory buffer size: {len(memory_buffer)}")
                    # Collect statistics
                    times.append(np.random.random())  # Placeholder for time
                    generated_nodes.append(np.random.randint(1000, 10000))  # Placeholder for generated nodes
                    suboptimalities.append(np.random.random())  # Placeholder for suboptimality
                    optimal_solutions.append(np.random.random())  # Placeholder for optimal solutions
                else:
                    print("Failed to solve task.")
        
        if num_solved < params['NumTasksPerIterThresh']:
            alpha = max(alpha - params['delta'], 0.5)
            update_beta = False
        else:
            update_beta = True
        
        # Train FFNN
        if len(memory_buffer) > 0:
            inputs, targets = zip(*random.sample(memory_buffer, min(len(memory_buffer), params['MiniBatchSize'])))
            inputs = torch.tensor(inputs, dtype=torch.float32).to(device)
            targets = torch.tensor(targets, dtype=torch.float32).to(device)
            optimizer_ffnn.zero_grad()
            outputs = nn_ffnn(inputs).squeeze()
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer_ffnn.step()
            print(f"FFNN Training Loss: {loss.item()}")
        
        # Train WUNN
        if len(memory_buffer) > 0:
            for _ in range(params['MaxTrainIter']):
                inputs, targets = zip(*random.sample(memory_buffer, min(len(memory_buffer), params['MiniBatchSize'])))
                inputs = torch.tensor(inputs, dtype=torch.float32).to(device)
                targets = torch.tensor(targets, dtype=torch.float32).to(device)
                optimizer_wunn.zero_grad()
                outputs = nn_wunn(inputs).squeeze()
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer_wunn.step()
                sigma_e_vals = outputs.cpu().detach().numpy()
                if all(sigma_e < params['kappa'] * params['epsilon'] for sigma_e in sigma_e_vals):
                    break
            print(f"WUNN Training Loss: {loss.item()}")
        
        if update_beta:
            beta = params['gamma'] * beta
        
        if len(memory_buffer) > 0:
            yq = np.quantile([yj for _, yj in memory_buffer], params['q'])
        print(f"Iteration {n+1}/{params['NumIter']} complete. yq: {yq}, num_solved: {num_solved}")
        
        # Store results for this iteration
        results.append({
            'alpha': alpha,
            'times': times,
            'generated_nodes': generated_nodes,
            'suboptimalities': suboptimalities,
            'optimal_solutions': optimal_solutions
        })
    
    return results

# Placeholder for cost to goal
def cost_to_goal(s):
    return np.sum(s)

# Define parameters
params = {
    'NumIter': 100,
    'NumTasksPerIter': 10,
    'NumTasksPerIterThresh': 5,
    'alpha0': 0.95,
    'delta': 0.05,
    'epsilon': 0.1,
    'beta0': 1.0,
    'gamma': 0.9,
    'kappa': 0.1,
    'MaxSteps': 100,
    'MemoryBufferMaxRecords': 1000,
    'TrainIter': 10,
    'MaxTrainIter': 10,
    'MiniBatchSize': 32,
    'tmax': 10,
    'mu0': 0,
    'sigma0': 1,
    'q': 0.9,
    'K': 10
}

# Run the learning algorithm for the 24-puzzle and collect results
results_24_puzzle = learn_heuristic_prac_24_puzzle(params)

Generated Task: ((11, 2, 14, 8, 4, 19, 21, 7, 16, 3, 18, 5, 0, 6, 12, 17, 23, 15, 13, 20, 22, 24, 10, 1, 9), array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25]))


KeyboardInterrupt: 

In [None]:

# Print detailed results for the 24-puzzle
for alpha_group in results_24_puzzle:
    alpha = alpha_group['alpha']
    times = np.array(alpha_group['times'])
    generated_nodes = np.array(alpha_group['generated_nodes'])
    suboptimalities = np.array(alpha_group['suboptimalities'])
    optimal_solutions = np.array(alpha_group['optimal_solutions'])

    print(f"\nAlpha: {alpha}")
    print(f"Time: {times.mean()} ({times.std()})")
    print(f"Generated Nodes: {generated_nodes.mean()} ({generated_nodes.std()})")
    print(f"Suboptimality: {suboptimalities.mean()} ({suboptimalities.std()})")
    print(f"Optimal Solutions: {optimal_solutions.mean()} ({optimal_solutions.std()})")



Alpha: 0.8999999999999999
Time: nan (nan)
Generated Nodes: nan (nan)
Suboptimality: nan (nan)
Optimal Solutions: nan (nan)

Alpha: 0.8499999999999999
Time: nan (nan)
Generated Nodes: nan (nan)
Suboptimality: nan (nan)
Optimal Solutions: nan (nan)

Alpha: 0.7999999999999998
Time: nan (nan)
Generated Nodes: nan (nan)
Suboptimality: nan (nan)
Optimal Solutions: nan (nan)

Alpha: 0.7499999999999998
Time: nan (nan)
Generated Nodes: nan (nan)
Suboptimality: nan (nan)
Optimal Solutions: nan (nan)

Alpha: 0.6999999999999997
Time: nan (nan)
Generated Nodes: nan (nan)
Suboptimality: nan (nan)
Optimal Solutions: nan (nan)

Alpha: 0.6499999999999997
Time: nan (nan)
Generated Nodes: nan (nan)
Suboptimality: nan (nan)
Optimal Solutions: nan (nan)

Alpha: 0.5999999999999996
Time: nan (nan)
Generated Nodes: nan (nan)
Suboptimality: nan (nan)
Optimal Solutions: nan (nan)

Alpha: 0.5499999999999996
Time: nan (nan)
Generated Nodes: nan (nan)
Suboptimality: nan (nan)
Optimal Solutions: nan (nan)

Alpha: 

  print(f"Time: {times.mean()} ({times.std()})")
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  print(f"Generated Nodes: {generated_nodes.mean()} ({generated_nodes.std()})")
  print(f"Suboptimality: {suboptimalities.mean()} ({suboptimalities.std()})")
  print(f"Optimal Solutions: {optimal_solutions.mean()} ({optimal_solutions.std()})")
