
    This Source Code Form is subject to the terms of the Mozilla Public
    License, v. 2.0. If a copy of the MPL was not distributed with this
    file, You can obtain one at http://mozilla.org/MPL/2.0/.


In [None]:
# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# To ease the loading of modules
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
#import parmap
import ray
import time

# Agent - BEACS - BENCHMARKING

## Main Parameters

In [None]:
#Environmental Set Up
RANDOM_ATTRIBUTE_LENGTH = 0
CLASSIFIER_LENGTH = 8 + RANDOM_ATTRIBUTE_LENGTH
NUMBER_OF_POSSIBLE_ACTIONS = 8
SLIPPERY_PROB = 0.25

#Exploration Set Up
NUMBER_OF_EXPLORE_TRIALS = 5000
METRICS_TRIAL_FREQUENCY_EXPLORE = 100
EPSILON = 0.8
BETA_ALP = 0.05

#Exploitation Set Up
NUMBER_OF_EXPLOIT_TRIALS_NO_RL = 500
BETA_EXPLOIT_NO_RL = 0.05
NUMBER_OF_EXPLOIT_TRIALS_RL_START = 500
BETA_EXPLOIT_RL_START = 0.05
NUMBER_OF_EXPLOIT_TRIALS_RL = 500
BETA_EXPLOIT_RL = 0.05

#RL Set Up
GAMMA = 0.95
BETA_RL = 0.05

#GA Set Up
CROSSOVER = 0.8
MUTATION = 0.3

#BEACS Set Up
ENABLE_PEP = True
LENGTH_OF_BEHAVIORAL_SEQUENCES = 2

#Parallelization and Iterations for Stats
NUMBER_OF_ITERATIONS_TO_BENCH = 30
NB_OF_PROCESSES = 60

JSON_RESULTS_FILENAME = "BEACS-ZIP-Slip.json"

## Launching Ray for Multiprocessing

In [None]:
# Local Mode
#ray.init(num_cpus=NB_OF_PROCESSES, ignore_reinit_error=True)
# Remote Mode
ray.init(address='auto', _redis_password='5241590000000000')
time.sleep(2.0)

## Statistics Computation

In [None]:
# For calculation standard deviation
import statistics

def compute_mean_and_stdev_for_one_env(env_name, parmap_results):
    
    old_knowledge_list = []
    old_population_list = []
    old_numerosity_list = []
    old_reliable_list = []
    old_mean_reliable_classifier_specificity_list = []
    old_mean_reliable_bs_classifier_specificity_list = []
    old_mean_reliable_no_bs_classifier_specificity_list = []
    old_pep_error_list = []
    old_peps_match_non_aliased_states_list = []
    
    new_knowledge_list = []
    new_population_list = []
    new_numerosity_list = []
    new_reliable_list = []
    new_mean_reliable_classifier_specificity_list = []
    new_mean_reliable_bs_classifier_specificity_list = []
    new_mean_reliable_no_bs_classifier_specificity_list = []
    new_pep_error_list = []
    new_peps_match_non_aliased_states_list = []
    
    full_knowledge_first_trial_list = []
    full_knowledge_stable_trial_list = []
    full_knowledge_last_trial_list = []
    
    old_avg_exploit_no_rl_list = []
    old_avg_exploit_rl_start_list = []
    old_avg_exploit_rl_list = []
    
    new_avg_exploit_no_rl_list = []
    new_avg_exploit_rl_start_list = []
    new_avg_exploit_rl_list = []
    
    memory_of_pai_states_list = []
    
    explore_time_list = []
    zipping_time_list = []
    old_time_list = []
    new_time_list = []

    for res in parmap_results:
        if res['maze'] == env_name:
            
            old_knowledge_list.append(res['old_knowledge'])
            old_population_list.append(res['old_population'])
            old_numerosity_list.append(res['old_numerosity'])
            old_reliable_list.append(res['old_reliable'])
            old_mean_reliable_classifier_specificity_list.append(res['old_mean_reliable_classifier_specificity'])
            old_mean_reliable_bs_classifier_specificity_list.append(res['old_mean_reliable_bs_classifier_specificity'])
            old_mean_reliable_no_bs_classifier_specificity_list.append(res['old_mean_reliable_no_bs_classifier_specificity'])
            old_pep_error_list.append(res['old_pep_error'])
            old_peps_match_non_aliased_states_list.append(res['old_peps_match_non_aliased_states'])
            
            new_knowledge_list.append(res['new_knowledge'])
            new_population_list.append(res['new_population'])
            new_numerosity_list.append(res['new_numerosity'])
            new_reliable_list.append(res['new_reliable'])
            new_mean_reliable_classifier_specificity_list.append(res['new_mean_reliable_classifier_specificity'])
            new_mean_reliable_bs_classifier_specificity_list.append(res['new_mean_reliable_bs_classifier_specificity'])
            new_mean_reliable_no_bs_classifier_specificity_list.append(res['new_mean_reliable_no_bs_classifier_specificity'])
            new_pep_error_list.append(res['new_pep_error'])
            new_peps_match_non_aliased_states_list.append(res['new_peps_match_non_aliased_states'])
            
            full_knowledge_first_trial_list.append(res['full_knowledge_first_trial'])
            full_knowledge_stable_trial_list.append(res['full_knowledge_stable_trial'])
            full_knowledge_last_trial_list.append(res['full_knowledge_last_trial'])
            
            old_avg_exploit_no_rl_list.append(res['old_avg_exploit_no_rl'])
            old_avg_exploit_rl_start_list.append(res['old_avg_exploit_rl_start'])
            old_avg_exploit_rl_list.append(res['old_avg_exploit_rl'])
            
            new_avg_exploit_no_rl_list.append(res['new_avg_exploit_no_rl'])
            new_avg_exploit_rl_start_list.append(res['new_avg_exploit_rl_start'])
            new_avg_exploit_rl_list.append(res['new_avg_exploit_rl'])
    
            explore_time_list.append(res['explore_time'])
            zipping_time_list.append(res['zipping_time'])
            old_time_list.append(res['old_time'])
            new_time_list.append(res['new_time'])
            
            memory_of_pai_states_list.append(res['memory_of_pai_states'])
    
    memory_of_pai_states_dict = {}
    for pai_states_list in memory_of_pai_states_list:
        for pai_state in pai_states_list:
            pai_state = "".join(pai_state)
            if pai_state in memory_of_pai_states_dict:
                memory_of_pai_states_dict[pai_state] += 1
            else:
                memory_of_pai_states_dict[pai_state] = 1
    
    # Compute the means and standard deviations
    
    avg_old_knowledge = statistics.mean(old_knowledge_list)
    std_old_knowledge = statistics.stdev(old_knowledge_list)
    avg_old_population = statistics.mean(old_population_list)
    std_old_population = statistics.stdev(old_population_list)
    avg_old_numerosity = statistics.mean(old_numerosity_list)
    std_old_numerosity = statistics.stdev(old_numerosity_list)
    avg_old_reliable = statistics.mean(old_reliable_list)
    std_old_reliable = statistics.stdev(old_reliable_list)
    avg_old_mean_reliable_classifier_specificity = statistics.mean(old_mean_reliable_classifier_specificity_list)
    std_old_mean_reliable_classifier_specificity = statistics.stdev(old_mean_reliable_classifier_specificity_list)
    avg_old_mean_reliable_bs_classifier_specificity = statistics.mean(old_mean_reliable_bs_classifier_specificity_list)
    std_old_mean_reliable_bs_classifier_specificity = statistics.stdev(old_mean_reliable_bs_classifier_specificity_list)
    avg_old_mean_reliable_no_bs_classifier_specificity = statistics.mean(old_mean_reliable_no_bs_classifier_specificity_list)
    std_old_mean_reliable_no_bs_classifier_specificity = statistics.stdev(old_mean_reliable_no_bs_classifier_specificity_list)
    avg_old_pep_error_list = statistics.mean(old_pep_error_list)
    std_old_pep_error_list = statistics.stdev(old_pep_error_list)
    avg_old_peps_match_non_aliased_states_list = statistics.mean(old_peps_match_non_aliased_states_list)
    std_old_peps_match_non_aliased_states_list = statistics.stdev(old_peps_match_non_aliased_states_list)
    
    avg_new_knowledge = statistics.mean(new_knowledge_list)
    std_new_knowledge = statistics.stdev(new_knowledge_list)
    avg_new_population = statistics.mean(new_population_list)
    std_new_population = statistics.stdev(new_population_list)
    avg_new_numerosity = statistics.mean(new_numerosity_list)
    std_new_numerosity = statistics.stdev(new_numerosity_list)
    avg_new_reliable = statistics.mean(new_reliable_list)
    std_new_reliable = statistics.stdev(new_reliable_list)
    avg_new_mean_reliable_classifier_specificity = statistics.mean(new_mean_reliable_classifier_specificity_list)
    std_new_mean_reliable_classifier_specificity = statistics.stdev(new_mean_reliable_classifier_specificity_list)
    avg_new_mean_reliable_bs_classifier_specificity = statistics.mean(new_mean_reliable_bs_classifier_specificity_list)
    std_new_mean_reliable_bs_classifier_specificity = statistics.stdev(new_mean_reliable_bs_classifier_specificity_list)
    avg_new_mean_reliable_no_bs_classifier_specificity = statistics.mean(new_mean_reliable_no_bs_classifier_specificity_list)
    std_new_mean_reliable_no_bs_classifier_specificity = statistics.stdev(new_mean_reliable_no_bs_classifier_specificity_list)
    avg_new_pep_error_list = statistics.mean(new_pep_error_list)
    std_new_pep_error_list = statistics.stdev(new_pep_error_list)
    avg_new_peps_match_non_aliased_states_list = statistics.mean(new_peps_match_non_aliased_states_list)
    std_new_peps_match_non_aliased_states_list = statistics.stdev(new_peps_match_non_aliased_states_list)
    
    avg_full_knowledge_first_trial_list = statistics.mean(full_knowledge_first_trial_list)
    std_full_knowledge_first_trial_list = statistics.stdev(full_knowledge_first_trial_list)
    avg_full_knowledge_stable_trial_list = statistics.mean(full_knowledge_stable_trial_list)
    std_full_knowledge_stable_trial_list = statistics.stdev(full_knowledge_stable_trial_list)
    avg_full_knowledge_last_trial_list = statistics.mean(full_knowledge_last_trial_list)
    std_full_knowledge_last_trial_list = statistics.stdev(full_knowledge_last_trial_list)
    
    old_avg_exploit_no_rl = statistics.mean(old_avg_exploit_no_rl_list)
    old_std_exploit_no_rl = statistics.stdev(old_avg_exploit_no_rl_list)
    old_avg_exploit_rl_start = statistics.mean(old_avg_exploit_rl_start_list)
    old_std_exploit_rl_start = statistics.stdev(old_avg_exploit_rl_start_list)
    old_avg_exploit_rl = statistics.mean(old_avg_exploit_rl_list)
    old_std_exploit_rl = statistics.stdev(old_avg_exploit_rl_list)
    
    new_avg_exploit_no_rl = statistics.mean(new_avg_exploit_no_rl_list)
    new_std_exploit_no_rl = statistics.stdev(new_avg_exploit_no_rl_list)
    new_avg_exploit_rl_start = statistics.mean(new_avg_exploit_rl_start_list)
    new_std_exploit_rl_start = statistics.stdev(new_avg_exploit_rl_start_list)
    new_avg_exploit_rl = statistics.mean(new_avg_exploit_rl_list)
    new_std_exploit_rl = statistics.stdev(new_avg_exploit_rl_list)
    
    avg_explore_time = statistics.mean(explore_time_list)
    std_explore_time = statistics.stdev(explore_time_list)
    avg_zipping_time = statistics.mean(zipping_time_list)
    std_zipping_time = statistics.stdev(zipping_time_list)
    avg_old_time = statistics.mean(old_time_list)
    std_old_time = statistics.stdev(old_time_list)
    avg_new_time = statistics.mean(new_time_list)
    std_new_time = statistics.stdev(new_time_list)
    
    dic = {
        'maze'             : env_name,
        
        'avg_old_knowledge'    : avg_old_knowledge,
        'std_old_knowledge'    : std_old_knowledge,
        'avg_old_population'   : avg_old_population,
        'std_old_population'   : std_old_population,
        'avg_old_numerosity'   : avg_old_numerosity,
        'std_old_numerosity'   : std_old_numerosity,
        'avg_old_reliable'     : avg_old_reliable,
        'std_old_reliable'     : std_old_reliable,
        'avg_old_mean_reliable_classifier_specificity' : avg_old_mean_reliable_classifier_specificity,
        'std_old_mean_reliable_classifier_specificity' : std_old_mean_reliable_classifier_specificity,
        'avg_old_mean_reliable_bs_classifier_specificity' : avg_old_mean_reliable_bs_classifier_specificity,
        'std_old_mean_reliable_bs_classifier_specificity' : std_old_mean_reliable_bs_classifier_specificity,
        'avg_old_mean_reliable_no_bs_classifier_specificity' : avg_old_mean_reliable_no_bs_classifier_specificity,
        'std_old_mean_reliable_no_bs_classifier_specificity' : std_old_mean_reliable_no_bs_classifier_specificity,
        'avg_old_pep_error_list' : avg_old_pep_error_list,
        'std_old_pep_error_list' : std_old_pep_error_list,
        'avg_old_peps_match_non_aliased_states_list' : avg_old_peps_match_non_aliased_states_list,
        'std_old_peps_match_non_aliased_states_list' : std_old_peps_match_non_aliased_states_list,
        
        'avg_new_knowledge'    : avg_new_knowledge,
        'std_new_knowledge'    : std_new_knowledge,
        'avg_new_population'   : avg_new_population,
        'std_new_population'   : std_new_population,
        'avg_new_numerosity'   : avg_new_numerosity,
        'std_new_numerosity'   : std_new_numerosity,
        'avg_new_reliable'     : avg_new_reliable,
        'std_new_reliable'     : std_new_reliable,
        'avg_new_mean_reliable_classifier_specificity' : avg_new_mean_reliable_classifier_specificity,
        'std_new_mean_reliable_classifier_specificity' : std_new_mean_reliable_classifier_specificity,
        'avg_new_mean_reliable_bs_classifier_specificity' : avg_new_mean_reliable_bs_classifier_specificity,
        'std_new_mean_reliable_bs_classifier_specificity' : std_new_mean_reliable_bs_classifier_specificity,
        'avg_new_mean_reliable_no_bs_classifier_specificity' : avg_new_mean_reliable_no_bs_classifier_specificity,
        'std_new_mean_reliable_no_bs_classifier_specificity' : std_new_mean_reliable_no_bs_classifier_specificity,
        'avg_new_peps_match_non_aliased_states_list' : avg_new_peps_match_non_aliased_states_list,
        'std_new_peps_match_non_aliased_states_list' : std_new_peps_match_non_aliased_states_list,
        
        'avg_full_knowledge_first_trial_list'  : avg_full_knowledge_first_trial_list,
        'std_full_knowledge_first_trial_list'  : std_full_knowledge_first_trial_list,
        'avg_full_knowledge_stable_trial_list' : avg_full_knowledge_stable_trial_list,
        'std_full_knowledge_stable_trial_list' : std_full_knowledge_stable_trial_list,
        'avg_full_knowledge_last_trial_list'   : avg_full_knowledge_last_trial_list,
        'std_full_knowledge_last_trial_list'   : std_full_knowledge_last_trial_list,
        
        'old_avg_exploit_no_rl'   : old_avg_exploit_no_rl,
        'old_std_exploit_no_rl'   : old_std_exploit_no_rl,
        'old_avg_exploit_rl_start': old_avg_exploit_rl_start,
        'old_std_exploit_rl_start': old_std_exploit_rl_start,
        'old_avg_exploit_rl'      : old_avg_exploit_rl,
        'old_std_exploit_rl'      : old_std_exploit_rl,
        
        'new_avg_exploit_no_rl'   : new_avg_exploit_no_rl,
        'new_std_exploit_no_rl'   : new_std_exploit_no_rl,
        'new_avg_exploit_rl_start': new_avg_exploit_rl_start,
        'new_std_exploit_rl_start': new_std_exploit_rl_start,
        'new_avg_exploit_rl'      : new_avg_exploit_rl,
        'new_std_exploit_rl'      : new_std_exploit_rl,
    
        'avg_explore_time' : avg_explore_time,
        'std_explore_time' : std_explore_time,
        'avg_zipping_time' : avg_zipping_time,
        'std_zipping_time' : std_zipping_time,
        'avg_old_time'     : avg_old_time,
        'std_old_time'     : std_old_time,
        'avg_new_time'     : avg_new_time,
        'std_new_time'     : std_new_time,
        
        'old_knowledge_list'  : old_knowledge_list,
        'old_population_list' : old_population_list,
        'old_numerosity_list' : old_numerosity_list,
        'old_reliable_list'   : old_reliable_list,
        'old_mean_reliable_classifier_specificity_list' : old_mean_reliable_classifier_specificity_list,
        'old_mean_reliable_bs_classifier_specificity_list' : old_mean_reliable_bs_classifier_specificity_list,
        'old_mean_reliable_no_bs_classifier_specificity_list' : old_mean_reliable_no_bs_classifier_specificity_list,
        'old_pep_error_list' : old_pep_error_list,
        'old_peps_match_non_aliased_states_list' : old_peps_match_non_aliased_states_list,
        
        'new_knowledge_list'  : new_knowledge_list,
        'new_population_list' : new_population_list,
        'new_numerosity_list' : new_numerosity_list,
        'new_reliable_list'   : new_reliable_list,
        'new_mean_reliable_classifier_specificity_list' : new_mean_reliable_classifier_specificity_list,
        'new_mean_reliable_bs_classifier_specificity_list' : new_mean_reliable_bs_classifier_specificity_list,
        'new_mean_reliable_no_bs_classifier_specificity_list' : new_mean_reliable_no_bs_classifier_specificity_list,
        'new_pep_error_list' : new_pep_error_list,
        'new_peps_match_non_aliased_states_list' : new_peps_match_non_aliased_states_list,
        
        'full_knowledge_first_trial_list'  : full_knowledge_first_trial_list,
        'full_knowledge_stable_trial_list' : full_knowledge_stable_trial_list,
        'full_knowledge_last_trial_list'   : full_knowledge_last_trial_list,
        
        'old_avg_exploit_no_rl_list'    : old_avg_exploit_no_rl_list,
        'old_avg_exploit_rl_start_list' : old_avg_exploit_rl_start_list,
        'old_avg_exploit_rl_list'       : old_avg_exploit_rl_list,
        
        'new_avg_exploit_no_rl_list'    : new_avg_exploit_no_rl_list,
        'new_avg_exploit_rl_start_list' : new_avg_exploit_rl_start_list,
        'new_avg_exploit_rl_list'       : new_avg_exploit_rl_list,
        
        'explore_time_list'  : explore_time_list,
        'zipping_time_list' : zipping_time_list,
        'old_time_list' : old_time_list,
        'new_time_list'   : new_time_list,
        
        'memory_of_pai_states_dict' : memory_of_pai_states_dict
    }
    
    return dic

## Benchmarking - Maze

Custom functions for getting available environments in Gym depending on the type of mazes :

In [None]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE1") \
    or env.id.startswith("MazeE2") or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("MiyazakiB")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods14") \
    or env.id.startswith("Maze4") or env.id.startswith("Maze5") \
    or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Function to get benchmark value on one gym environment :

In [None]:
@ray.remote
def bench_on_maze(env):
    # To ease the loading of modules
    import os
    import sys
    import time
    # Depending on local or remote mode, on where the ray server is started
    # Local Mode
    #module_path = os.path.abspath(os.path.join('..'))
    # Remote Mode
    module_path = os.path.abspath(os.path.join('.'))
    if module_path not in sys.path:
        sys.path.append(module_path)
    
    # Load BEACS module
    from beacs.agents.beacs import BEACS, Configuration

    # Load Metrics
    from my_example.metrics.MazeMetrics import \
        _maze_metrics, \
        _how_many_peps_match_non_aliased_states, \
        _mean_reliable_classifier_specificity, \
        _when_full_knowledge_is_achieved, \
        _enhanced_effect_error

    # Load environments
    import gym
    import my_mazes
    
    cfg_explore = Configuration(
        classifier_length=CLASSIFIER_LENGTH,
        number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
        user_metrics_collector_fcn=_maze_metrics,
        metrics_trial_frequency=METRICS_TRIAL_FREQUENCY_EXPLORE,
        do_pep=ENABLE_PEP,
        beta_alp=BETA_ALP,
        beta_rl=BETA_RL,
        gamma=GAMMA,
        epsilon=EPSILON,
        u_max=CLASSIFIER_LENGTH,
        mu=MUTATION,
        chi=CROSSOVER,
        bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
    )

    cfg_exploit_no_rl = Configuration(
        classifier_length=CLASSIFIER_LENGTH,
        number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
        user_metrics_collector_fcn=_maze_metrics,
        metrics_trial_frequency=1,
        beta_rl=BETA_EXPLOIT_NO_RL,
        gamma=GAMMA,
        epsilon=0.2
    )

    cfg_exploit_rl_start = Configuration(
        classifier_length=CLASSIFIER_LENGTH,
        number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
        user_metrics_collector_fcn=_maze_metrics,
        metrics_trial_frequency=1,
        beta_rl=BETA_EXPLOIT_RL_START,
        gamma=GAMMA,
        epsilon=0.0
    )

    cfg_exploit_rl = Configuration(
        classifier_length=CLASSIFIER_LENGTH,
        number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
        user_metrics_collector_fcn=_maze_metrics,
        metrics_trial_frequency=1,
        beta_rl=BETA_EXPLOIT_RL,
        gamma=GAMMA,
        epsilon=0.0,
    )
        
    # Initialize environment
    maze = gym.make(env.id)
    
    # Set up probability to do a random action
    maze.env.set_prob_slippery(SLIPPERY_PROB)
    
    # Set up random attribute length
    maze.env.set_random_attribute_length(RANDOM_ATTRIBUTE_LENGTH)

    # Reset it, by putting an agent into random position
    situation = maze.reset()

    # Training of BEACS - Exploration
    explore_start_time = time.process_time()
    agent_explore = BEACS(cfg_explore)
    population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
    explore_end_time = time.process_time()
    
    # Zipping
    duplicate_population_explore = agent_explore.duplicate_population()
    zipping_start_time = time.process_time()
    agent_explore.zip_population()
    zipping_end_time = time.process_time()
    population_explore = agent_explore.get_population()
    
    # ollecting measures
    old_peps_match_non_aliased_states = _how_many_peps_match_non_aliased_states(duplicate_population_explore, maze)
    old_pep_error = _enhanced_effect_error(duplicate_population_explore, maze, CLASSIFIER_LENGTH, RANDOM_ATTRIBUTE_LENGTH)
    old_mean_reliable_classifier_specificity, old_mean_reliable_no_bs_classifier_specificity, old_mean_reliable_bs_classifier_specificity = _mean_reliable_classifier_specificity(duplicate_population_explore, maze)
    old_maze_metrics = _maze_metrics(duplicate_population_explore, maze)
    
    new_peps_match_non_aliased_states = _how_many_peps_match_non_aliased_states(population_explore, maze)
    new_pep_error = _enhanced_effect_error(population_explore, maze, CLASSIFIER_LENGTH, RANDOM_ATTRIBUTE_LENGTH)
    new_mean_reliable_classifier_specificity, new_mean_reliable_no_bs_classifier_specificity, new_mean_reliable_bs_classifier_specificity = _mean_reliable_classifier_specificity(population_explore, maze)
    new_maze_metrics = _maze_metrics(population_explore, maze)
    
    first_trial, stable_trial, last_trial = _when_full_knowledge_is_achieved(metrics_explore)
    
    
    ### Using BEACS - Uncompressed population
    
    old_start_time = time.process_time()
    
    # Using BEACS - Exploitation - No RL module
    duplicate_agent_exploit_no_rl = BEACS(cfg_exploit_no_rl, duplicate_population_explore)
    duplicate_population_exploit_no_rl, duplicate_metrics_exploit_no_rl = duplicate_agent_exploit_no_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_NO_RL)

    # Using BEACS - Exploitation - Starting using RL module
    duplicate_agent_exploit_rl_start = BEACS(cfg_exploit_rl_start, duplicate_population_exploit_no_rl)
    duplicate_population_exploit_rl_start, duplicate_metrics_exploit_rl_start = duplicate_agent_exploit_rl_start.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL_START)

    # Using BEACS - Exploitation - Using RL module
    duplicate_agent_exploit_rl = BEACS(cfg_exploit_rl, duplicate_population_exploit_rl_start)
    duplicate_population_exploit_rl, duplicate_metrics_exploit_rl = duplicate_agent_exploit_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL)
    
    old_end_time = time.process_time()

    
    ### Using BEACS - Compressed population
    
    new_start_time = time.process_time()
    
    # Using BEACS - Exploitation - No RL module
    agent_exploit_no_rl = BEACS(cfg_exploit_no_rl, population_explore)
    population_exploit_no_rl, metrics_exploit_no_rl = agent_exploit_no_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_NO_RL)

    # Using BEACS - Exploitation - Starting using RL module
    agent_exploit_rl_start = BEACS(cfg_exploit_rl_start, population_exploit_no_rl)
    population_exploit_rl_start, metrics_exploit_rl_start = agent_exploit_rl_start.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL_START)

    # Using BEACS - Exploitation - Using RL module
    agent_exploit_rl = BEACS(cfg_exploit_rl, population_exploit_rl_start)
    population_exploit_rl, metrics_exploit_rl = agent_exploit_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL)

    new_end_time = time.process_time()
    
    
    # Get average 'steps to exit' in all exploitation modes
    old_avg_step_exploit_no_rl = 0
    for trial in duplicate_metrics_exploit_no_rl:
        old_avg_step_exploit_no_rl += trial['steps_in_trial']
    old_avg_step_exploit_no_rl /= NUMBER_OF_EXPLOIT_TRIALS_NO_RL
    old_avg_step_exploit_rl_start = 0
    for trial in duplicate_metrics_exploit_rl_start:
        old_avg_step_exploit_rl_start += trial['steps_in_trial']
    old_avg_step_exploit_rl_start /= NUMBER_OF_EXPLOIT_TRIALS_RL_START
    old_avg_step_exploit_rl = 0
    for trial in duplicate_metrics_exploit_rl:
        old_avg_step_exploit_rl += trial['steps_in_trial']
    old_avg_step_exploit_rl /= NUMBER_OF_EXPLOIT_TRIALS_RL
    
    # Get average 'steps to exit' in all exploitation modes
    new_avg_step_exploit_no_rl = 0
    for trial in metrics_exploit_no_rl:
        new_avg_step_exploit_no_rl += trial['steps_in_trial']
    new_avg_step_exploit_no_rl /= NUMBER_OF_EXPLOIT_TRIALS_NO_RL
    new_avg_step_exploit_rl_start = 0
    for trial in metrics_exploit_rl_start:
        new_avg_step_exploit_rl_start += trial['steps_in_trial']
    new_avg_step_exploit_rl_start /= NUMBER_OF_EXPLOIT_TRIALS_RL_START
    new_avg_step_exploit_rl = 0
    for trial in metrics_exploit_rl:
        new_avg_step_exploit_rl += trial['steps_in_trial']
    new_avg_step_exploit_rl /= NUMBER_OF_EXPLOIT_TRIALS_RL
    
    result = {
        'maze' : env.id,
        
        'old_knowledge' : old_maze_metrics['knowledge'],
        'old_population' : old_maze_metrics['population'],
        'old_numerosity' : old_maze_metrics['numerosity'],
        'old_reliable' : old_maze_metrics['reliable'],
        'old_mean_reliable_classifier_specificity' : old_mean_reliable_classifier_specificity,
        'old_mean_reliable_bs_classifier_specificity' : old_mean_reliable_bs_classifier_specificity,
        'old_mean_reliable_no_bs_classifier_specificity' : old_mean_reliable_no_bs_classifier_specificity,
        'old_pep_error': old_pep_error,
        'old_peps_match_non_aliased_states': old_peps_match_non_aliased_states,
        
        'new_knowledge' : new_maze_metrics['knowledge'],
        'new_population' : new_maze_metrics['population'],
        'new_numerosity' : new_maze_metrics['numerosity'],
        'new_reliable' : new_maze_metrics['reliable'],
        'new_mean_reliable_classifier_specificity' : new_mean_reliable_classifier_specificity,
        'new_mean_reliable_bs_classifier_specificity' : new_mean_reliable_bs_classifier_specificity,
        'new_mean_reliable_no_bs_classifier_specificity' : new_mean_reliable_no_bs_classifier_specificity,
        'new_pep_error': new_pep_error,
        'new_peps_match_non_aliased_states': new_peps_match_non_aliased_states,
        
        'full_knowledge_first_trial' : first_trial,
        'full_knowledge_stable_trial' : stable_trial,
        'full_knowledge_last_trial' : last_trial,
        
        'old_avg_exploit_no_rl' : old_avg_step_exploit_no_rl,
        'old_avg_exploit_rl_start' : old_avg_step_exploit_rl_start,
        'old_avg_exploit_rl' : old_avg_step_exploit_rl,
        
        'new_avg_exploit_no_rl' : new_avg_step_exploit_no_rl,
        'new_avg_exploit_rl_start' : new_avg_step_exploit_rl_start,
        'new_avg_exploit_rl' : new_avg_step_exploit_rl,
        
        'memory_of_pai_states' : agent_explore.get_pai_states_memory(), 
        
        'explore_time' : explore_end_time - explore_start_time,
        'zipping_time' : zipping_end_time - zipping_start_time,
        'old_time' : (old_end_time - old_start_time),
        'new_time' : (new_end_time - new_start_time)
    }
    
    print(result)
    
    return result

Set up the list of environments to bench : 

In [None]:
maze_envs = []
maze_envs_name = []
for env in all_envs:
    if filter_envs_typeIII(env) or filter_envs_typeII(env) or filter_envs_typeI(env) or filter_envs_na(env):
    #if env.id.startswith("Woods100"):
        maze_envs_name.append(env.id)
        for i in range(NUMBER_OF_ITERATIONS_TO_BENCH):
            maze_envs.append(env)

Benchmarking :

In [None]:
futures = [bench_on_maze.remote(env) for env in maze_envs]
results = ray.get(futures)
results = [compute_mean_and_stdev_for_one_env(env_name, results) for env_name in maze_envs_name]

import json
jsonString = json.dumps(results)
jsonFile = open(JSON_RESULTS_FILENAME, "w")
jsonFile.write(jsonString)
jsonFile.close()

## Closing Ray

In [None]:
ray.shutdown()