
    This Source Code Form is subject to the terms of the Mozilla Public
    License, v. 2.0. If a copy of the MPL was not distributed with this
    file, You can obtain one at http://mozilla.org/MPL/2.0/.


In [None]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load BACS module
from bacs.agents.bacs import BACS, Configuration
from bacs.agents.bacs.utils.GymMazeWrapper import _maze_metrics, _does_pees_match_non_aliased_states

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

# Agent - BACS - BENCHMARKING

## Main Parameters

In [None]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 1000
METRICS_TRIAL_FREQUENCY_EXPLORE = 1
BETA_EXPLORE = 0.05
EPSILON = 0.8

LENGTH_OF_BEHAVIORAL_SEQUENCES = 0

NUMBER_OF_ITERATIONS_TO_BENCH = 30

NB_OF_PROCESSES = 24

DO_GA = True
ENABLE_PEE = True

## Full Details of Agent Parameters

In [None]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=METRICS_TRIAL_FREQUENCY_EXPLORE,
    do_pee=ENABLE_PEE,
    do_ga=DO_GA,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

### Benchmarking - Maze

In [None]:
def bench_on_maze(env,n):
    knowledge_list = []
    population_list = []
    numerosity_list = []
    reliable_list = []
    pees_matching_non_aliased_states_list = []
    
    for i in range(n):
        
        # Initialize environment
        maze = gym.make(env.id)
        
        # Reset it, by putting an agent into random position
        situation = maze.reset()
    
        # Training of BACS - Exploration
        agent_explore = BACS(cfg_explore)
        population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
        
        # Get population metrics
        knowledge_list.append(metrics_explore[-1]['knowledge'])
        population_list.append(metrics_explore[-1]['population'])
        numerosity_list.append(metrics_explore[-1]['numerosity'])
        reliable_list.append(metrics_explore[-1]['reliable'])
        pees_matching_non_aliased_states_list.append(
            _does_pees_match_non_aliased_states(population_explore, maze))
        
        if i == n-1:
            print(env.id,'|','#' * (i+1), '|')
        else:
            print(env.id,'|','#' * (i+1) ,' ' * (n-i-2), '|')
    
    
    # Compute the means and standard deviations
    avg_knowledge = statistics.mean(knowledge_list)
    std_knowledge = statistics.stdev(knowledge_list)
    avg_population = statistics.mean(population_list)
    std_population = statistics.stdev(population_list)
    avg_numerosity = statistics.mean(numerosity_list)
    std_numerosity = statistics.stdev(numerosity_list)
    avg_reliable = statistics.mean(reliable_list)
    std_reliable = statistics.stdev(reliable_list)
    avg_pees_matching_non_aliased_states = statistics.mean(pees_matching_non_aliased_states_list)
    std_pees_matching_non_aliased_states = statistics.stdev(pees_matching_non_aliased_states_list)
    
    result = {
        'maze'             : env.id,
        
        'avg_knowledge'    : avg_knowledge,
        'std_knowledge'    : std_knowledge,
        'avg_population'   : avg_population,
        'std_population'   : std_population,
        'avg_numerosity'   : avg_numerosity,
        'std_numerosity'   : std_numerosity,
        'avg_reliable'     : avg_reliable,
        'std_reliable'     : std_reliable,
        'avg_pees_matching_non_aliased_states' : avg_pees_matching_non_aliased_states,
        'std_pees_matching_non_aliased_states' : std_pees_matching_non_aliased_states,
        
        'knowledge_list' : knowledge_list,
        'population_list' : population_list,
        'numerosity_list' : numerosity_list,
        'reliable_list' : reliable_list,
        'pees_matching_non_aliased_states_list' : pees_matching_non_aliased_states_list
    }
    
    print(result)
    
    return result

Custom functions for getting available environments in Gym depending on the type of mazes :

In [None]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE1") \
    or env.id.startswith("MazeE2") or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("MiyazakiB")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods1-") \
    or env.id.startswith("Woods14") or env.id.startswith("Maze4") \
    or env.id.startswith("Maze5") or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Benchmarking without the genetic algorithms :

In [None]:
maze_envs = [env for env in all_envs if filter_envs_na(env) or filter_envs_typeI(env) or filter_envs_typeII(env) or filter_envs_typeIII(env)]

results = parmap.map(bench_on_maze, maze_envs, NUMBER_OF_ITERATIONS_TO_BENCH, pm_pbar=True, pm_processes=NB_OF_PROCESSES)

Parsing the previous result to get a markdown string for each environment :

In [None]:
markdown_str = ''

for item in results:
    markdown_str += '|BACS-'+str(LENGTH_OF_BEHAVIORAL_SEQUENCES)+'|'
    markdown_str += item['maze'] + '|'
    markdown_str += "{:.3f}".format(item['avg_knowledge']) + ' ('
    markdown_str += "{:.3f}".format(item['std_knowledge']) + ')|'
    markdown_str += "{:.3f}".format(item['avg_population']) + ' ('
    markdown_str += "{:.3f}".format(item['std_population']) + ')|'
    markdown_str += "{:.3f}".format(item['avg_reliable']) + ' ('
    markdown_str += "{:.3f}".format(item['std_reliable']) + ')|'
    markdown_str += '\n'
    
print(markdown_str)

In [None]:
print(results)