In [1]:
import PEC_Parser
import numpy as np
import matplotlib.pyplot as plt 
import gym
import random

In [2]:
def read_domain(folder, domain):
    file_path = f"pec_domains\{folder}\{domain}"
    file = open(file_path, "r")
    domain_string = file.read()
    return(domain_string)

In [3]:
domain_string = read_domain("complex_domains", "cooking_robot.txt")
# Instantiate a domain object
domain = PEC_Parser.domain()

# Compute fluents, values, states, and actions as dictionaries for domain object
domain.initialise_all(domain_string)

({'carrot': 0,
  'onion': 1,
  'chicken': 2,
  'garlic': 3,
  'leek': 4,
  'turnip': 5,
  'soup': 6},
 {'unprepared': 0, 'prepared': 1, 'incomplete': 2, 'complete': 3, 'plated': 4},
 {(0, 0, 0, 0, 0, 0, 2): 0,
  (0, 0, 0, 0, 0, 0, 3): 1,
  (0, 0, 0, 0, 0, 0, 4): 2,
  (0, 0, 0, 0, 0, 1, 2): 3,
  (0, 0, 0, 0, 0, 1, 3): 4,
  (0, 0, 0, 0, 0, 1, 4): 5,
  (0, 0, 0, 0, 1, 0, 2): 6,
  (0, 0, 0, 0, 1, 0, 3): 7,
  (0, 0, 0, 0, 1, 0, 4): 8,
  (0, 0, 0, 0, 1, 1, 2): 9,
  (0, 0, 0, 0, 1, 1, 3): 10,
  (0, 0, 0, 0, 1, 1, 4): 11,
  (0, 0, 0, 1, 0, 0, 2): 12,
  (0, 0, 0, 1, 0, 0, 3): 13,
  (0, 0, 0, 1, 0, 0, 4): 14,
  (0, 0, 0, 1, 0, 1, 2): 15,
  (0, 0, 0, 1, 0, 1, 3): 16,
  (0, 0, 0, 1, 0, 1, 4): 17,
  (0, 0, 0, 1, 1, 0, 2): 18,
  (0, 0, 0, 1, 1, 0, 3): 19,
  (0, 0, 0, 1, 1, 0, 4): 20,
  (0, 0, 0, 1, 1, 1, 2): 21,
  (0, 0, 0, 1, 1, 1, 3): 22,
  (0, 0, 0, 1, 1, 1, 4): 23,
  (0, 0, 1, 0, 0, 0, 2): 24,
  (0, 0, 1, 0, 0, 0, 3): 25,
  (0, 0, 1, 0, 0, 0, 4): 26,
  (0, 0, 1, 0, 0, 1, 2): 27,
  (0, 0, 1, 0, 0

In [4]:
initial_distribution = domain.get_initial(domain_string)
transition_matrix = domain.get_transition(domain_string)
policy_matrix = domain.get_policy(domain_string)
n_steps = domain.max_instant
state_dict = domain.state_dict
reverse_state_dict = {v: k for k, v in state_dict.items()}
action_dict = domain.action_dict
reverse_action_dict = {v: k for k, v in action_dict.items()}
value_dict = domain.value_dict
reverse_value_dict = {v: k for k, v in value_dict.items()}
fluent_dict = domain.fluent_dict
reverse_fluent_dict = {v: k for k, v in fluent_dict.items()}

In [5]:
# Cast transition matrix as array
T = np.array(transition_matrix)
print("transition shape:", T.shape)

# Replace empty policy (in finished states) with null action
policy_container = []
for policy in policy_matrix:
    Pi_matrix = np.array([[0.0]*(len(action_dict)-1)+[1.0] if i == [] else i for i in policy])
    print("policy shape:", Pi_matrix.shape)
    policy_container.append(Pi_matrix)

transition shape: (9, 192, 192)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)
policy shape: (192, 9)


In [None]:
# T starts (A, S, S'), make it (S', S, A)
T_reshaped = np.transpose(T, (2, 1, 0))


# Define starting state probs as vector array
S = np.zeros(len(state_dict))
for i in range(len(initial_distribution[0])):
    S[initial_distribution[0][i]] = initial_distribution[1][i]

# Define list containing state vectors
states_at_instant = [S]

# Iterate
current = S
for i in range(n_steps):
    # Flatten policy matrix
    # policy has shape (S, A)
    policy_flat = policy_container[i].reshape(-1)
    S_reshaped = current.reshape(1, -1, 1)
    # Multiply together to scale probabilities based on starting probabilities
    T_matrix = T_reshaped * S_reshaped

    # Reshape T_matrix to prepare for dot product with policy
    T_matrix = T_matrix.reshape(T_matrix.shape[0], -1)
    next_state_probs = np.dot(T_matrix, policy_flat)
    states_at_instant.append(next_state_probs)
    current = next_state_probs

In [11]:
query_time = 11 # From 0 to max time instant
query_literal = "soup=plated"
states_associated = set(domain.partial_to_states(query_literal))
print(sum([states_at_instant[query_time][i] for i in states_associated]))

0.6546912150878905


In [8]:
0.95**6 * (1-0.5**4) * (1- 0.05)

0.6546912150878904

In [None]:
query_time = 11 # From 0 to max time instant
query_literal = "soup=plated"
conditions = [["soup=completed", 9]]

