In [1]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load BACS module
from bacs.agents.bacs import BACS, Configuration
from bacs.agents.bacs.utils.GymMazeWrapper import _maze_metrics

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

# Agent - BACS - BENCHMARKING

## Main Parameters

In [2]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 1000
METRICS_TRIAL_FREQUENCY_EXPLORE = 100
BETA_EXPLORE = 0.05
EPSILON = 0.8

NUMBER_OF_EXPLOIT_TRIALS_NO_RL = 500
BETA_EXPLOIT_NO_RL = 0.00

NUMBER_OF_EXPLOIT_TRIALS_RL_START = 100
BETA_EXPLOIT_RL_START = 0.05

NUMBER_OF_EXPLOIT_TRIALS_RL = 500
BETA_EXPLOIT_RL = 0.05

LENGTH_OF_BEHAVIORAL_SEQUENCES = 2

NUMBER_OF_ITERATIONS_TO_BENCH = 30

NB_OF_PROCESSES = 24

## Full Details of Agent Parameters

In [3]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=METRICS_TRIAL_FREQUENCY_EXPLORE,
    do_ga=False,
    do_subsumption=True,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit_no_rl = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=True,
    beta=BETA_EXPLOIT_NO_RL,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit_rl_start = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=True,
    beta=BETA_EXPLOIT_RL_START,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit_rl = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=True,
    beta=BETA_EXPLOIT_RL,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

### Benchmarking - Maze

In [4]:
def bench_on_maze(env,n):
    avg_exploit_no_rl_list = []
    avg_exploit_rl_start_list = []
    avg_exploit_rl_list = []
    knowledge_list = []
    population_list = []
    numerosity_list = []
    reliable_list = []
    
    for i in range(n):
        
        # Initialize environment
        maze = gym.make(env.id)
        
        # Reset it, by putting an agent into random position
        situation = maze.reset()
    
        # Training of BACS - Exploration
        agent_explore = BACS(cfg_explore)
        population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
    
        # Using BACS - Exploitation - No RL module
        agent_exploit_no_rl = BACS(cfg_exploit_no_rl, population_explore)
        agent_exploit_no_rl.clean_population(does_anticipate_change=True)
        population_exploit_no_rl, metrics_exploit_no_rl = agent_exploit_no_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_NO_RL)
        
        # Using BACS - Exploitation - Starting using RL module
        agent_exploit_rl_start = BACS(cfg_exploit_rl_start, population_exploit_no_rl)
        agent_exploit_rl_start.clean_population(does_anticipate_change=True)
        population_exploit_rl_start, metrics_exploit_rl_start = agent_exploit_rl_start.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL_START)
        
        # Using BACS - Exploitation - Using RL module
        agent_exploit_rl = BACS(cfg_exploit_rl, population_exploit_rl_start)
        agent_exploit_rl.clean_population(does_anticipate_change=True)
        population_exploit_rl, metrics_exploit_rl = agent_exploit_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL)
        
        # Get frequencies to compute averages
        metrics_trial_frequency_explore = METRICS_TRIAL_FREQUENCY_EXPLORE
        metrics_trial_frequency_exploit = 1
        
        # Get average in all exploitation modes
        avg_step_exploit_no_rl = 0
        for trial in metrics_exploit_no_rl:
            avg_step_exploit_no_rl += trial['steps_in_trial']
        avg_step_exploit_no_rl /= NUMBER_OF_EXPLOIT_TRIALS_NO_RL
        avg_exploit_no_rl_list.append(avg_step_exploit_no_rl)

        avg_step_exploit_rl_start = 0
        for trial in metrics_exploit_rl_start:
            avg_step_exploit_rl_start += trial['steps_in_trial']
        avg_step_exploit_rl_start /= NUMBER_OF_EXPLOIT_TRIALS_RL_START
        avg_exploit_rl_start_list.append(avg_step_exploit_rl_start)

        avg_step_exploit_rl = 0
        for trial in metrics_exploit_rl:
            avg_step_exploit_rl += trial['steps_in_trial']
        avg_step_exploit_rl /= NUMBER_OF_EXPLOIT_TRIALS_RL
        avg_exploit_rl_list.append(avg_step_exploit_rl)
        
        # Get other metrics
        knowledge_list.append(metrics_exploit_rl[-1]['knowledge'])
        population_list.append(metrics_exploit_rl[-1]['population'])
        numerosity_list.append(metrics_exploit_rl[-1]['numerosity'])
        reliable_list.append(metrics_exploit_rl[-1]['reliable'])
        
        if i == n-1:
            print(env.id,'|','#' * (i+1), '|')
        else:
            print(env.id,'|','#' * (i+1) ,' ' * (n-i-2), '|')
    
    
    # Compute the means and standard deviations
    avg_exploit_no_rl = float(sum(avg_exploit_no_rl_list)) / n
    std_exploit_no_rl = statistics.pstdev(avg_exploit_no_rl_list)
    avg_exploit_rl_start = float(sum(avg_exploit_rl_start_list)) / n
    std_exploit_rl_start = statistics.pstdev(avg_exploit_rl_start_list)
    avg_exploit_rl = float(sum(avg_exploit_rl_list)) / n
    std_exploit_rl = statistics.pstdev(avg_exploit_rl_list)
    avg_knowledge = float(sum(knowledge_list)) / n
    std_knowledge = statistics.pstdev(knowledge_list)
    avg_population = float(sum(population_list)) / n
    std_population = statistics.pstdev(population_list)
    avg_numerosity = float(sum(numerosity_list)) / n
    std_numerosity = statistics.pstdev(numerosity_list)
    avg_reliable = float(sum(reliable_list)) / n
    std_reliable = statistics.pstdev(reliable_list)
    
    result = {
        'maze'             : env.id,
        'avg_exploit_no_rl'      : avg_exploit_no_rl,
        'std_exploit_no_rl'      : std_exploit_no_rl,
        'avg_exploit_rl_start'      : avg_exploit_rl_start,
        'std_exploit_rl_start'      : std_exploit_rl_start,
        'avg_exploit_rl'      : avg_exploit_rl,
        'std_exploit_rl'      : std_exploit_rl,
        'avg_knowledge'    : avg_knowledge,
        'std_knowledge'    : std_knowledge,
        'avg_population'   : avg_population,
        'std_population'   : std_population,
        'avg_numerosity'   : avg_numerosity,
        'std_numerosity'   : std_numerosity,
        'avg_reliable'     : avg_reliable,
        'std_reliable'     : std_reliable,
        'avg_exploit_no_rl_list' : avg_exploit_no_rl_list,
        'avg_exploit_rl_list' : avg_exploit_rl_list
    }
    
    print(result)
    
    return result

Custom functions for getting available environments in Gym depending on the type of mazes :

In [5]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE1") \
    or env.id.startswith("MazeE2") or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("MiyazakiB")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods1-") \
    or env.id.startswith("Woods14") or env.id.startswith("Maze4") \
    or env.id.startswith("Maze5") or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Benchmarking without the genetic algorithms :

In [None]:
#maze_envs = [env for env in all_envs if filter_envs_na(env) or filter_envs_typeI(env) or filter_envs_typeII(env) or filter_envs_typeIII(env)]
maze_envs = [env for env in all_envs if filter_envs_typeIII(env)]
results = parmap.map(bench_on_maze, maze_envs, NUMBER_OF_ITERATIONS_TO_BENCH, pm_pbar=True, pm_processes=NB_OF_PROCESSES)

  0%|          | 0/7 [00:00<?, ?it/s]

Woods100-v0 | #                              |
Woods100-v0 | ##                             |
Woods100-v0 | ###                            |
Woods100-v0 | ####                           |
Woods100-v0 | #####                          |
Woods101-v0 | #                              |
Woods100-v0 | ######                         |
Woods100-v0 | #######                        |
Woods100-v0 | ########                       |
Woods100-v0 | #########                      |
Woods100-v0 | ##########                     |
Woods100-v0 | ###########                    |
Woods101demi-v0 | #                              |
Woods100-v0 | ############                   |
Woods101-v0 | ##                             |
Woods100-v0 | #############                  |
Woods100-v0 | ##############                 |
Woods100-v0 | ###############                |
Woods100-v0 | ################               |
Maze10-v0 | #                              |
Woods100-v0 | #################              |
Woods100-v0

 14%|█▍        | 1/7 [03:16<19:36, 196.01s/it]

Woods101demi-v0 | ###                            |
Woods101-v0 | #####                          |
Maze10-v0 | ##                             |
Woods101-v0 | ######                         |
Woods101demi-v0 | ####                           |
Woods101-v0 | #######                        |
Woods102-v0 | ##                             |
Maze10-v0 | ###                            |
Woods101-v0 | ########                       |
Woods101demi-v0 | #####                          |
Woods101-v0 | #########                      |
MazeE1-v0 | #                              |
MazeE2-v0 | #                              |
Woods101-v0 | ##########                     |
Woods101demi-v0 | ######                         |
Woods102-v0 | ###                            |
Maze10-v0 | ####                           |
Woods101-v0 | ###########                    |
Woods101-v0 | ############                   |
Woods101demi-v0 | #######                        |
Maze10-v0 | #####                          |
Woods

 29%|██▊       | 2/7 [20:32<37:20, 448.03s/it]

Maze10-v0 | #############                  |
Woods101demi-v0 | ##################             |
Woods102-v0 | #########                      |
Maze10-v0 | ##############                 |
Woods101demi-v0 | ###################            |
Woods101demi-v0 | ####################           |
MazeE1-v0 | ####                           |
Maze10-v0 | ###############                |
Woods102-v0 | ##########                     |
Woods101demi-v0 | #####################          |
Maze10-v0 | ################               |
Woods101demi-v0 | ######################         |
MazeE2-v0 | ####                           |
Woods102-v0 | ###########                    |
Woods101demi-v0 | #######################        |
Maze10-v0 | #################              |
MazeE1-v0 | #####                          |
Woods101demi-v0 | ########################       |
Maze10-v0 | ##################             |
Woods102-v0 | ############                   |
Woods101demi-v0 | #########################      |

 43%|████▎     | 3/7 [36:02<39:30, 592.64s/it]

Maze10-v0 | ######################         |
Woods102-v0 | ###############                |
MazeE2-v0 | ######                         |
MazeE1-v0 | #######                        |
Maze10-v0 | #######################        |
Woods102-v0 | ################               |
Maze10-v0 | ########################       |
Woods102-v0 | #################              |
Maze10-v0 | #########################      |
MazeE1-v0 | ########                       |
MazeE2-v0 | #######                        |
Woods102-v0 | ##################             |
Maze10-v0 | ##########################     |
Maze10-v0 | ###########################    |
Woods102-v0 | ###################            |
Maze10-v0 | ############################   |
MazeE1-v0 | #########                      |
Woods102-v0 | ####################           |
Maze10-v0 | #############################  |
MazeE2-v0 | ########                       |
Maze10-v0 | ############################## |
{'maze': 'Maze10-v0', 'avg_exploit_no_rl': 

 57%|█████▋    | 4/7 [50:24<33:40, 673.46s/it]

Woods102-v0 | #####################          |
MazeE1-v0 | ##########                     |
Woods102-v0 | ######################         |
Woods102-v0 | #######################        |
MazeE1-v0 | ###########                    |
MazeE2-v0 | #########                      |
Woods102-v0 | ########################       |
Woods102-v0 | #########################      |
MazeE1-v0 | ############                   |
Woods102-v0 | ##########################     |
MazeE2-v0 | ##########                     |
MazeE1-v0 | #############                  |
Woods102-v0 | ###########################    |
Woods102-v0 | ############################   |
MazeE2-v0 | ###########                    |
Woods102-v0 | #############################  |
MazeE1-v0 | ##############                 |
Woods102-v0 | ############################## |
{'maze': 'Woods102-v0', 'avg_exploit_no_rl': 13.709666666666667, 'std_exploit_no_rl': 5.809883557831048, 'avg_exploit_rl_start': 4.7793333333333345, 'std_exploit_rl_start

 71%|███████▏  | 5/7 [1:11:14<28:12, 846.45s/it]

MazeE1-v0 | ###############                |
MazeE2-v0 | ############                   |
MazeE1-v0 | ################               |
MazeE2-v0 | #############                  |
MazeE1-v0 | #################              |
MazeE2-v0 | ##############                 |
MazeE1-v0 | ##################             |
MazeE2-v0 | ###############                |
MazeE1-v0 | ###################            |
MazeE1-v0 | ####################           |
MazeE2-v0 | ################               |
MazeE1-v0 | #####################          |
MazeE2-v0 | #################              |
MazeE1-v0 | ######################         |
MazeE2-v0 | ##################             |
MazeE1-v0 | #######################        |
MazeE2-v0 | ###################            |
MazeE1-v0 | ########################       |
MazeE1-v0 | #########################      |
MazeE2-v0 | ####################           |
MazeE1-v0 | ##########################     |
MazeE2-v0 | #####################          |
MazeE1-v0 

 86%|████████▌ | 6/7 [2:27:36<32:47, 1967.19s/it]

MazeE2-v0 | ########################       |
MazeE2-v0 | #########################      |
MazeE2-v0 | ##########################     |
MazeE2-v0 | ###########################    |
MazeE2-v0 | ############################   |
MazeE2-v0 | #############################  |


Parsing the previous result to get a markdown string for each environment :

In [None]:
markdown_str = ''

for item in results:
    markdown_str += '|BACS-'+str(LENGTH_OF_BEHAVIORAL_SEQUENCES)+'|'
    markdown_str += item['maze'] + '|'
    markdown_str += "{:.3f}".format(item['avg_exploit_no_rl']) + '|'
    markdown_str += "{:.3f}".format(item['std_exploit_no_rl']) + '|'
    markdown_str += "{:.3f}".format(min(item['avg_exploit_no_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(max(item['avg_exploit_no_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(item['avg_exploit_rl']) + '|'
    markdown_str += "{:.3f}".format(item['std_exploit_rl']) + '|'
    markdown_str += "{:.3f}".format(min(item['avg_exploit_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(max(item['avg_exploit_rl_list'])) + '|'
    #markdown_str += "{:.3f}".format(item['avg_knowledge']) + '|'
    #markdown_str += "{:.3f}".format(item['std_knowledge']) + '|'
    markdown_str += "{:.3f}".format(item['avg_population']) + '|'
    markdown_str += "{:.3f}".format(item['std_population']) + '|'
    markdown_str += "{:.3f}".format(item['avg_reliable']) + '|'
    markdown_str += "{:.3f}".format(item['std_reliable']) + '|'
    markdown_str += '\n'
    
print(markdown_str)

In [None]:
print(results)