In [1]:
from utils import utils
import gym
from gym_subgoal_automata.envs.base.base_env import BaseEnv

from reinforcement_learning.isa_hrl_algorithm import ISAAlgorithmHRL

ENV_SUBGOAL_AUTOMATA_PREFIX = "gym_subgoal_automata:"

In [2]:
def get_param(config, param_name):
    param_value = utils.get_param(config, param_name)
    if param_value is None:
        raise RuntimeError("Error: The configuration parameters \'%s\' cannot be undefined." % param_name)
    return param_value

def get_environment_classes(environment_names):
    environment_classes = []
    if "landmark-is-not-building" in environment_names:
        environment_classes.append(("GeoWorldLandmarkIsNotBuildingEnv-v0", {}))
    if "landmark-with-restrictions" in environment_names:
        environment_classes.append(("GeoWorldLandmarkWithRestrictionsEnv-v0", {}))
    if "landmark-sequence-with-restriction" in environment_names:
        environment_classes.append(("GeoWorldLandmarkSequenceWithRestrictionsEnv-v0", {}))
    return environment_classes


def get_random_tasks(environment_classes, config):
    tasks = []
    use_seed = get_param(config, "use_environment_seed")
    num_tasks = get_param(config, "num_tasks")
    for env_class, env_params in environment_classes:
        domain_tasks = []
        for task_id in range(num_tasks):
            seed = task_id + get_param(config, "starting_environment_seed") if use_seed else None
            task_params = {**env_params, "generation": "random", BaseEnv.RANDOM_SEED_FIELD: seed, **config}
            domain_tasks.append(gym.make(ENV_SUBGOAL_AUTOMATA_PREFIX + env_class, params=task_params))
        tasks.append(domain_tasks)
    return tasks
    
def get_target_automata(environment_classes):
    return [gym.make(ENV_SUBGOAL_AUTOMATA_PREFIX + env_class, params={**env_params, "generation": "random"}).get_automaton()
            for env_class, env_params in environment_classes]

In [3]:
config = utils.read_json_file("test_config.json")
environment_classes = get_environment_classes(get_param(config, "environments"))
task_generation_method = get_param(config, "task_generation_method")
if task_generation_method == "random":
    args = [get_random_tasks(environment_classes, config), get_param(config, "num_tasks")]
# elif task_generation_method == "predefined":
#     predefined_tasks, num_tasks = get_predefined_tasks(environment_classes, config)
#     args = [predefined_tasks, num_tasks]
else:
    raise RuntimeError("Error: Unknown task generation method {}.".format(task_generation_method))
args.extend([get_param(config, "folder_names"), config])
args.extend([get_target_automata(environment_classes), "/homes/tw2222/Documents/project/ilasp"])
isa_algorithm = ISAAlgorithmHRL(*args)

In [4]:
isa_algorithm.run(False)

  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):


Updating automaton 0... The counterexample is: [('ms',), ('sc',), ('cv',)]
Current Automaton Limit is: #state:[3,), #var:[2,3)
Domain: 0 - Task: 0 - Episode: 1 - Terminal: True - Reward: 0.0 - Steps: 58 - Observations: [('ms',), ('sc',), ('cv',)]
Updating automaton 0... The counterexample is: [('ml',)]
Current Automaton Limit is: #state:[3,), #var:[2,3)


  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):


Domain: 0 - Task: 1 - Episode: 1 - Terminal: True - Reward: 0.0 - Steps: 12 - Observations: [('ml',)]


  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):


Domain: 0 - Task: 2 - Episode: 1 - Terminal: False - Reward: 0.0 - Steps: 200 - Observations: []
Updating automaton 0... The counterexample is: [('sc',)]
Current Automaton Limit is: #state:[3,), #var:[2,3)
Domain: 0 - Task: 3 - Episode: 1 - Terminal: False - Reward: 0.0 - Steps: 25 - Observations: [('sc',)]


  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):


Domain: 0 - Task: 4 - Episode: 1 - Terminal: True - Reward: 0.0 - Steps: 121 - Observations: [('ft',)]
Domain: 0 - Task: 5 - Episode: 1 - Terminal: False - Reward: 0.0 - Steps: 200 - Observations: [('sc',)]
Domain: 0 - Task: 6 - Episode: 1 - Terminal: True - Reward: 0.0 - Steps: 24 - Observations: [('ml',)]
Domain: 0 - Task: 7 - Episode: 1 - Terminal: True - Reward: 0.0 - Steps: 62 - Observations: [('ft',)]
Domain: 0 - Task: 8 - Episode: 1 - Terminal: True - Reward: 0.0 - Steps: 34 - Observations: [('wf',)]
Domain: 0 - Task: 9 - Episode: 1 - Terminal: True - Reward: 0.0 - Steps: 2 - Observations: [('ft',)]
Domain: 0 - Task: 0 - Episode: 2 - Terminal: True - Reward: 1.0 - Steps: 154 - Observations: [('ms',), ('sc',), ('pk',)]
Domain: 0 - Task: 1 - Episode: 2 - Terminal: False - Reward: 0.0 - Steps: 200 - Observations: [('sc',)]
Domain: 0 - Task: 2 - Episode: 2 - Terminal: True - Reward: 0.0 - Steps: 135 - Observations: [('ml',)]
Domain: 0 - Task: 3 - Episode: 2 - Terminal: True - Reward