In [1]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load BACS module
from bacs.agents.bacs import BACS, Configuration
from bacs.agents.bacs.utils.GymMazeWrapper import _maze_metrics

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

# Agent - BACS - BENCHMARKING

## Main Parameters

In [2]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 1000
METRICS_TRIAL_FREQUENCY_EXPLORE = 20
BETA_EXPLORE = 0.05
EPSILON = 0.8

NUMBER_OF_EXPLOIT_TRIALS_NO_RL = 500
BETA_EXPLOIT_NO_RL = 0.00

NUMBER_OF_EXPLOIT_TRIALS_RL_START = 100
BETA_EXPLOIT_RL_START = 0.05

NUMBER_OF_EXPLOIT_TRIALS_RL = 500
BETA_EXPLOIT_RL = 0.05

LENGTH_OF_BEHAVIORAL_SEQUENCES = 1

NUMBER_OF_ITERATIONS_TO_BENCH = 30

NB_OF_PROCESSES = 24

## Full Details of Agent Parameters

In [3]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=METRICS_TRIAL_FREQUENCY_EXPLORE,
    do_ga=False,
    do_subsumption=True,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit_no_rl = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=True,
    beta=BETA_EXPLOIT_NO_RL,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit_rl_start = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=True,
    beta=BETA_EXPLOIT_RL_START,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit_rl = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=True,
    beta=BETA_EXPLOIT_RL,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

### Benchmarking - Maze

In [4]:
def bench_on_maze(env,n):
    avg_explore_list = []
    avg_exploit_no_rl_list = []
    avg_exploit_rl_start_list = []
    avg_exploit_rl_list = []
    knowledge_list = []
    population_list = []
    numerosity_list = []
    reliable_list = []
    
    for i in range(n):
        
        # Initialize environment
        maze = gym.make(env.id)
        
        # Reset it, by putting an agent into random position
        situation = maze.reset()
    
        # Training of BACS - Exploration
        agent_explore = BACS(cfg_explore)
        population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
    
        # Using BACS - Exploitation - No RL module
        agent_exploit_no_rl = BACS(cfg_exploit_no_rl, population_explore)
        agent_exploit_no_rl.clean_population(does_anticipate_change=True)
        population_exploit_no_rl, metrics_exploit_no_rl = agent_exploit_no_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_NO_RL)
        
        # Using BACS - Exploitation - Starting using RL module
        agent_exploit_rl_start = BACS(cfg_exploit_rl_start, population_exploit_no_rl)
        agent_exploit_rl_start.clean_population(does_anticipate_change=True)
        population_exploit_rl_start, metrics_exploit_rl_start = agent_exploit_rl_start.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL_START)
        
        # Using BACS - Exploitation - Using RL module
        agent_exploit_rl = BACS(cfg_exploit_rl, population_exploit_rl_start)
        agent_exploit_rl.clean_population(does_anticipate_change=True)
        population_exploit_rl, metrics_exploit_rl = agent_exploit_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL)
        
        # Get frequencies to compute averages
        metrics_trial_frequency_explore = METRICS_TRIAL_FREQUENCY_EXPLORE
        metrics_trial_frequency_exploit = 1
        
        # Get average in exploration mode
        avg_step_explore = 0
        for trial in metrics_explore:
            avg_step_explore += trial['steps_in_trial']
        avg_step_explore /= NUMBER_OF_EXPLORE_TRIALS / metrics_trial_frequency_explore
        avg_explore_list.append(avg_step_explore)
        
        # Get average in all exploitation modes
        avg_step_exploit_no_rl = 0
        for trial in metrics_exploit_no_rl:
            avg_step_exploit_no_rl += trial['steps_in_trial']
        avg_step_exploit_no_rl /= NUMBER_OF_EXPLOIT_TRIALS_NO_RL
        avg_exploit_no_rl_list.append(avg_step_exploit_no_rl)

        avg_step_exploit_rl_start = 0
        for trial in metrics_exploit_rl_start:
            avg_step_exploit_rl_start += trial['steps_in_trial']
        avg_step_exploit_rl_start /= NUMBER_OF_EXPLOIT_TRIALS_RL_START
        avg_exploit_rl_start_list.append(avg_step_exploit_rl_start)

        avg_step_exploit_rl = 0
        for trial in metrics_exploit_rl:
            avg_step_exploit_rl += trial['steps_in_trial']
        avg_step_exploit_rl /= NUMBER_OF_EXPLOIT_TRIALS_RL
        avg_exploit_rl_list.append(avg_step_exploit_rl)
        
        # Get other metrics
        knowledge_list.append(metrics_exploit_rl[-1]['knowledge'])
        population_list.append(metrics_exploit_rl[-1]['population'])
        numerosity_list.append(metrics_exploit_rl[-1]['numerosity'])
        reliable_list.append(metrics_exploit_rl[-1]['reliable'])
        
        if i == n-1:
            print(env.id,'|','#' * (i+1), '|')
        else:
            print(env.id,'|','#' * (i+1) ,' ' * (n-i-2), '|')
    
    
    # Compute the means and standard deviations
    avg_explore = float(sum(avg_explore_list)) / n
    std_explore = statistics.pstdev(avg_explore_list)
    avg_exploit_no_rl = float(sum(avg_exploit_no_rl_list)) / n
    std_exploit_no_rl = statistics.pstdev(avg_exploit_no_rl_list)
    avg_exploit_rl_start = float(sum(avg_exploit_rl_start_list)) / n
    std_exploit_rl_start = statistics.pstdev(avg_exploit_rl_start_list)
    avg_exploit_rl = float(sum(avg_exploit_rl_list)) / n
    std_exploit_rl = statistics.pstdev(avg_exploit_rl_list)
    avg_knowledge = float(sum(knowledge_list)) / n
    std_knowledge = statistics.pstdev(knowledge_list)
    avg_population = float(sum(population_list)) / n
    std_population = statistics.pstdev(population_list)
    avg_numerosity = float(sum(numerosity_list)) / n
    std_numerosity = statistics.pstdev(numerosity_list)
    avg_reliable = float(sum(reliable_list)) / n
    std_reliable = statistics.pstdev(reliable_list)
    
    result = {
        'maze'             : env.id,
        'avg_explore'      : avg_explore,
        'std_explore'      : std_explore,
        'avg_exploit_no_rl'      : avg_exploit_no_rl,
        'std_exploit_no_rl'      : std_exploit_no_rl,
        'avg_exploit_rl_start'      : avg_exploit_rl_start,
        'std_exploit_rl_start'      : std_exploit_rl_start,
        'avg_exploit_rl'      : avg_exploit_rl,
        'std_exploit_rl'      : std_exploit_rl,
        'avg_knowledge'    : avg_knowledge,
        'std_knowledge'    : std_knowledge,
        'avg_population'   : avg_population,
        'std_population'   : std_population,
        'avg_numerosity'   : avg_numerosity,
        'std_numerosity'   : std_numerosity,
        'avg_reliable'     : avg_reliable,
        'std_reliable'     : std_reliable,
        'avg_exploit_no_rl_list' : avg_exploit_no_rl_list,
        'avg_exploit_rl_list' : avg_exploit_rl_list
    }
    
    print(result)
    
    return result

Custom functions for getting available environments in Gym depending on the type of mazes :

In [5]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE1") \
    or env.id.startswith("MazeE2") or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("MiyazakiB")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods1-") \
    or env.id.startswith("Woods14") or env.id.startswith("Maze4") \
    or env.id.startswith("Maze5") or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Benchmarking without the genetic algorithms :

In [6]:
maze_envs = [env for env in all_envs if filter_envs_na(env) or filter_envs_typeI(env) or filter_envs_typeII(env) or filter_envs_typeIII(env)]

results = parmap.map(bench_on_maze, maze_envs, NUMBER_OF_ITERATIONS_TO_BENCH, pm_pbar=True, pm_processes=NB_OF_PROCESSES)

  0%|          | 0/24 [00:38<?, ?it/s]Process ForkPoolWorker-12:
Process ForkPoolWorker-11:
Process ForkPoolWorker-13:
Process ForkPoolWorker-15:
Process ForkPoolWorker-10:
Process ForkPoolWorker-2:
Traceback (most recent call last):
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Process ForkPoolWorker-14:
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Process ForkPoolWorker-5:
Process ForkPoolWorker-7:
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
Traceback (most recent call last):
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/pool.py", line 44, in mapstar
    return list(map(*args))
  File "/Users/romain/anaconda3/lib/python3.7/site-packages/parmap/parmap.py", line 105, in _func_star_single
    **func_item_args[3])
  File "<ipyt

  File "/Users/romain/Documents/jupyter/BACS/bacs/agents/Agent.py", line 42, in explore
    return self._evaluate(env, trials, self._run_trial_explore)
  File "/Users/romain/Documents/jupyter/BACS/bacs/agents/Agent.py", line 42, in explore
    return self._evaluate(env, trials, self._run_trial_explore)
  File "<ipython-input-4-e38a0edd72de>", line 21, in bench_on_maze
    population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
  File "<ipython-input-4-e38a0edd72de>", line 21, in bench_on_maze
    population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
  File "/Users/romain/Documents/jupyter/BACS/bacs/agents/Agent.py", line 42, in explore
    return self._evaluate(env, trials, self._run_trial_explore)
  File "/Users/romain/Documents/jupyter/BACS/bacs/agents/Agent.py", line 112, in _evaluate
    steps_in_trial, reward = func(env, steps, current_trial)
  File "/Users/romain/Documents/jupyter/BACS/bacs/agents/Agent.py"

  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/romain/Documents/jupyter/BACS/bacs/agents/bacs/ClassifiersList.py", line 24, in <listcomp>
    matching = [cl for cl in self if cl.condition.does_match(situation)]
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/romain/Documents/jupyter/BACS/bacs/agents/bacs/ClassifiersList.py", line 57, in get_maximum_fitness
    anticipated_change_cls = [cl for cl in self
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*s

KeyboardInterrupt
  File "/Users/romain/anaconda3/lib/python3.7/_collections_abc.py", line 883, in __iter__
    v = self[i]
  File "/Users/romain/anaconda3/lib/python3.7/site-packages/parmap/parmap.py", line 105, in _func_star_single
    **func_item_args[3])
  File "/Users/romain/anaconda3/lib/python3.7/site-packages/parmap/parmap.py", line 105, in _func_star_single
    **func_item_args[3])
  File "/Users/romain/anaconda3/lib/python3.7/site-packages/parmap/parmap.py", line 105, in _func_star_single
    **func_item_args[3])
  File "/Users/romain/Documents/jupyter/BACS/bacs/TypedList.py", line 43, in __getitem__
    return self._items[i]
  File "/Users/romain/anaconda3/lib/python3.7/site-packages/parmap/parmap.py", line 105, in _func_star_single
    **func_item_args[3])
  File "/Users/romain/anaconda3/lib/python3.7/site-packages/parmap/parmap.py", line 105, in _func_star_single
    **func_item_args[3])
  File "/Users/romain/anaconda3/lib/python3.7/site-packages/parmap/parmap.py", line 10

  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
Process ForkPoolWorker-26:
Process ForkPoolWorker-27:
Process ForkPoolWorker-25:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 285, in _bootstrap
    util._close_stdin()
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/util.py", line 380, in _close_stdin
    sys.stdin.close()
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/romain/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    se

Parsing the previous result to get a markdown string for each environment :

In [None]:
markdown_str = ''

for item in results:
    markdown_str += '|BACS-'+str(LENGTH_OF_BEHAVIORAL_SEQUENCES)+'|'
    markdown_str += item['maze'] + '|'
    markdown_str += "{:.3f}".format(item['avg_exploit_no_rl']) + '|'
    markdown_str += "{:.3f}".format(item['std_exploit_no_rl']) + '|'
    markdown_str += "{:.3f}".format(min(item['avg_exploit_no_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(max(item['avg_exploit_no_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(item['avg_exploit_rl']) + '|'
    markdown_str += "{:.3f}".format(item['std_exploit_rl']) + '|'
    markdown_str += "{:.3f}".format(min(item['avg_exploit_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(max(item['avg_exploit_rl_list'])) + '|'
    #markdown_str += "{:.3f}".format(item['avg_knowledge']) + '|'
    #markdown_str += "{:.3f}".format(item['std_knowledge']) + '|'
    markdown_str += "{:.3f}".format(item['avg_population']) + '|'
    markdown_str += "{:.3f}".format(item['std_population']) + '|'
    markdown_str += "{:.3f}".format(item['avg_reliable']) + '|'
    markdown_str += "{:.3f}".format(item['std_reliable']) + '|'
    markdown_str += '\n'
    
print(markdown_str)

In [None]:
print(results)