In [63]:
import numpy
import json
import utils
from utils import device
import hashlib
import torch
import numpy as np
from copy import deepcopy
import pickle


class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NumpyEncoder, self).default(obj)

def hash_state(state):
    state_string = json.dumps(state, cls=NumpyEncoder, sort_keys=True).encode('utf-8')
    return hashlib.sha256(state_string).hexdigest()  # Hash the encoded state_string, not the state



def depth_first_search(env, agent, depth):
    if depth == 0:
        return [hash_state(env)]
    
    next_states = get_next_states(env, agent)

    # for each of the next states, get their next states and append them to all_states
    all_states = []

    for next_state in next_states:
        env_new = pickle.loads(pickle.dumps(env)) # restore the environment state
        all_states.extend(depth_first_search(env_new, agent, depth=depth-1))
    
    return all_states

def hash_grid(env):
    # Retrieve the grid
    grid = env.grid.encode()

    # Retrieve the agent's position and direction
    agent_pos = env.agent_pos
    agent_dir = env.agent_dir

    # Flatten the grid and convert to string
    grid_string = ''.join(str(cell) for row in grid for cell in row)

    # Add the agent's position and direction to the string
    state_string = f'{grid_string},{agent_pos},{agent_dir}'
    
    print(agent_pos, agent_dir)
    # Hash the string
    return hashlib.sha256(state_string.encode('utf-8')).hexdigest()


def get_next_states(env, agent):
    """
    Returns all possible next states given current state
    """
    # find all possible actions
    action_space = env.action_space.n

    # initialize next_states list
    next_states = []

    # get next state for each action
    for action in range(action_space):
        # Create a new environment instance by pickling and unpickling
        env_new = pickle.loads(pickle.dumps(env))

        obs_new, _, _, _, _ = env_new.step(action)
        next_states.append(hash_state(env_new))

    return set(next_states)



# Replace command line arguments with hard-coded values.
env_name = "MiniGrid-DoorKey-6x6-v0"
model_name = "DoorKeya2c"
seed = 0
shift = 0
argmax = False
pause = 0.1
gif = None
episodes = 1
memory = False
text = False

# Set seed for all randomness sources
utils.seed(seed)

# Set device
print(f"Device: {device}\n")

# Load environment
env = utils.make_env(env_name, seed)
for _ in range(shift):
    env.reset()
print("Environment loaded\n")

# Load agent
model_dir = utils.get_model_dir(model_name)
agent = utils.Agent(env.observation_space, env.action_space, model_dir,
                    argmax=argmax, use_memory=memory, use_text=text)
print("Agent loaded\n")

# Run the agent
if gif:
    from array2gif import write_gif
    frames = []

for episode in range(episodes):
    obs, _ = env.reset()

    while True:
        action = agent.get_action(obs)
        obs, reward, terminated, truncated, _ = env.step(action)

        next_states = depth_first_search(env, agent, depth=2)
        print(len(next_states))

        done = terminated | truncated
        agent.analyze_feedback(reward, done)

        if done:
            break

if gif:
    print("Saving gif... ", end="")
    write_gif(numpy.array(frames), gif+".gif", fps=1/pause)
    print("Done.")


Device: cuda

Environment loaded

Agent loaded



TypeError: Object of type OrderEnforcing is not JSON serializable

In [55]:
a = 1
print(f"{=a}")

SyntaxError: invalid syntax (<fstring>, line 1)

In [None]:
env_copy2 = env.grid


In [61]:
dir(env)

['__annotations__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_action_space',
 '_cached_spec',
 '_disable_render_order_enforcing',
 '_has_reset',
 '_metadata',
 '_np_random',
 '_observation_space',
 '_reward_range',
 '_saved_kwargs',
 'action_space',
 'class_name',
 'close',
 'env',
 'has_reset',
 'metadata',
 'np_random',
 'observation_space',
 'render',
 'render_mode',
 'reset',
 'reward_range',
 'spec',
 'step',
 'unwrapped',
 'wrapper_spec']

In [34]:
env_copy2.grid

[<minigrid.core.world_object.Wall at 0x7fa2ed7b49d0>,
 <minigrid.core.world_object.Wall at 0x7fa2ed7a3fd0>,
 <minigrid.core.world_object.Wall at 0x7fa2ed7a3dd0>,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b4450>,
 <minigrid.core.world_object.Wall at 0x7fa2eda6b510>,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b4110>,
 <minigrid.core.world_object.Wall at 0x7fa2ed7a3f50>,
 None,
 None,
 <minigrid.core.world_object.Wall at 0x7fa36e290110>,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b42d0>,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b4a50>,
 None,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b44d0>,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b4f10>,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b4ad0>,
 None,
 None,
 <minigrid.core.world_object.Door at 0x7fa2ed7b4090>,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b4390>,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b4b50>,
 None,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2ed7b4690>,
 <min

In [23]:
env_copy.grid

[<minigrid.core.world_object.Wall at 0x7fa2eda42a50>,
 <minigrid.core.world_object.Wall at 0x7fa2eda42910>,
 <minigrid.core.world_object.Wall at 0x7fa2eda425d0>,
 <minigrid.core.world_object.Wall at 0x7fa2eda42a10>,
 <minigrid.core.world_object.Wall at 0x7fa2eda42b50>,
 <minigrid.core.world_object.Wall at 0x7fa2eda42790>,
 <minigrid.core.world_object.Wall at 0x7fa2eda424d0>,
 None,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2eda42690>,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2eda42710>,
 <minigrid.core.world_object.Wall at 0x7fa2eda42990>,
 None,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2eda427d0>,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2eda429d0>,
 <minigrid.core.world_object.Wall at 0x7fa2eda426d0>,
 None,
 None,
 <minigrid.core.world_object.Door at 0x7fa2eda42750>,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2eda42c10>,
 <minigrid.core.world_object.Wall at 0x7fa2eda42bd0>,
 None,
 None,
 <minigrid.core.world_object.Wall at 0x7fa2eda42c50>,
 <min

In [5]:
action = agent.get_action(obs)
obs, reward, terminated, truncated, _ = env.step(action)

# next_states now contains a dictionary of possible states at t+1, t+2, etc.        
next_states =  depth_first_search(env, obs, agent, depth=3)
print(len(next_states))

done = terminated | truncated
agent.analyze_feedback(reward, done)



NameError: name 'agent' is not defined

In [3]:

for episode in range(args.episodes):
    obs, _ = env.reset()

#     while True:





NameError: name 'args' is not defined