
    This Source Code Form is subject to the terms of the Mozilla Public
    License, v. 2.0. If a copy of the MPL was not distributed with this
    file, You can obtain one at http://mozilla.org/MPL/2.0/.


In [None]:
# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load BACS module
from bacs.agents.bacs import BACS, Configuration

from bacs.metrics import _maze_metrics

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

# Agent - BACS - BENCHMARKING

## Main Parameters

In [None]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

PROB_SLIPPERY = 0.25

NUMBER_OF_EXPLORE_TRIALS = 1000
METRICS_TRIAL_FREQUENCY_EXPLORE = 25
BETA_EXPLORE = 0.05
EPSILON = 0.8

NUMBER_OF_EXPLOIT_TRIALS_NO_RL = 500
BETA_EXPLOIT_NO_RL = 0.00

NUMBER_OF_EXPLOIT_TRIALS_RL_START = 100
BETA_EXPLOIT_RL_START = 0.05

NUMBER_OF_EXPLOIT_TRIALS_RL = 500
BETA_EXPLOIT_RL = 0.05

LENGTH_OF_BEHAVIORAL_SEQUENCES = 1

NUMBER_OF_ITERATIONS_TO_BENCH = 3

NB_OF_PROCESSES = 1

DO_GA = False
ENABLE_PEE = False
KEEP_ONLY_RELIABLE = False

## Full Details of Agent Parameters

In [None]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=METRICS_TRIAL_FREQUENCY_EXPLORE,
    do_pee=ENABLE_PEE,
    do_ga=DO_GA,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit_no_rl = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=None,
    metrics_trial_frequency=1,
    do_pee=ENABLE_PEE,
    do_ga=False,
    beta=BETA_EXPLOIT_NO_RL,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit_rl_start = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=None,
    metrics_trial_frequency=1,
    do_pee=ENABLE_PEE,
    do_ga=False,
    beta=BETA_EXPLOIT_RL_START,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit_rl = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=None,
    metrics_trial_frequency=1,
    do_pee=ENABLE_PEE,
    do_ga=False,
    beta=BETA_EXPLOIT_RL,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

### Benchmarking - Maze

Custom functions for getting available environments in Gym depending on the type of mazes :

In [None]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE1") \
    or env.id.startswith("MazeE2") or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("MiyazakiB")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods1-") \
    or env.id.startswith("Woods14") or env.id.startswith("Maze4") \
    or env.id.startswith("Maze5") or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Function to get benchmark value on one gym environment :

In [None]:
def bench_on_maze(env):
        
    # Initialize environment
    maze = gym.make(env.id)

    # Reset it, by putting an agent into random position
    situation = maze.reset()

    # Training of BACS - Exploration
    agent_explore = BACS(cfg_explore)
    population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)

    # Using BACS - Exploitation - No RL module
    agent_exploit_no_rl = BACS(cfg_exploit_no_rl, population_explore)
    agent_exploit_no_rl.zip_population(does_anticipate_change=True, is_reliable=KEEP_ONLY_RELIABLE)
    population_exploit_no_rl, metrics_exploit_no_rl = agent_exploit_no_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_NO_RL)

    # Using BACS - Exploitation - Starting using RL module
    agent_exploit_rl_start = BACS(cfg_exploit_rl_start, population_exploit_no_rl)
    agent_exploit_rl_start.zip_population(does_anticipate_change=True, is_reliable=KEEP_ONLY_RELIABLE)
    population_exploit_rl_start, metrics_exploit_rl_start = agent_exploit_rl_start.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL_START)

    # Using BACS - Exploitation - Using RL module
    agent_exploit_rl = BACS(cfg_exploit_rl, population_exploit_rl_start)
    agent_exploit_rl.zip_population(does_anticipate_change=True, is_reliable=KEEP_ONLY_RELIABLE)
    population_exploit_rl, metrics_exploit_rl = agent_exploit_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL)
    
    # Get compressed population metrics
    compressed_metrics = _maze_metrics(agent_exploit_rl.get_population(), maze)
        
    # Get average in all exploitation modes
    avg_step_exploit_no_rl = 0
    for trial in metrics_exploit_no_rl:
        avg_step_exploit_no_rl += trial['steps_in_trial']
    avg_step_exploit_no_rl /= NUMBER_OF_EXPLOIT_TRIALS_NO_RL
    avg_step_exploit_rl_start = 0
    for trial in metrics_exploit_rl_start:
        avg_step_exploit_rl_start += trial['steps_in_trial']
    avg_step_exploit_rl_start /= NUMBER_OF_EXPLOIT_TRIALS_RL_START
    avg_step_exploit_rl = 0
    for trial in metrics_exploit_rl:
        avg_step_exploit_rl += trial['steps_in_trial']
    avg_step_exploit_rl /= NUMBER_OF_EXPLOIT_TRIALS_RL
    
    result = {
        'maze' : env.id,
        
        'knowledge' : metrics_explore[-1]['knowledge'],
        'population' : metrics_explore[-1]['population'],
        'numerosity' : metrics_explore[-1]['numerosity'],
        'reliable' : metrics_explore[-1]['reliable'],
        
        'knowledge_compressed' : compressed_metrics['knowledge'],
        'population_compressed' : compressed_metrics['population'],
        'numerosity_compressed' : compressed_metrics['numerosity'],
        'reliable_compressed' : compressed_metrics['reliable'],
        
        'avg_exploit_no_rl' : avg_step_exploit_no_rl,
        'avg_exploit_rl_start' : avg_step_exploit_rl_start,
        'avg_exploit_rl' : avg_step_exploit_rl,
        }
    
    return result

Benchmarking :

In [None]:
maze_envs = []
maze_envs_name = []
for env in all_envs:
    #if env.id.startswith("Woods100"):
    if filter_envs_typeIII(env) or filter_envs_typeII(env) or filter_envs_typeI(env) or filter_envs_na(env):
        maze_envs_name.append(env.id)
        for i in range(NUMBER_OF_ITERATIONS_TO_BENCH):
            maze_envs.append(env)

parmap_results = parmap.map(bench_on_maze, maze_envs, pm_pbar=True, pm_processes=NB_OF_PROCESSES, pm_chunksize=4)

In [None]:
def compute_mean_and_stdev_for_one_env(env_name, parmap_results):
    knowledge_list = []
    population_list = []
    numerosity_list = []
    reliable_list = []
    knowledge_list_compressed = []
    population_list_compressed = []
    numerosity_list_compressed = []
    reliable_list_compressed = []
    avg_exploit_no_rl_list = []
    avg_exploit_rl_start_list = []
    avg_exploit_rl_list = []
    
    for res in parmap_results:
        if res['maze'] == env_name:
            knowledge_list.append(res['knowledge'])
            population_list.append(res['population'])
            numerosity_list.append(res['numerosity'])
            reliable_list.append(res['reliable'])
            knowledge_list_compressed.append(res['knowledge_compressed'])
            population_list_compressed.append(res['population_compressed'])
            numerosity_list_compressed.append(res['numerosity_compressed'])
            reliable_list_compressed.append(res['reliable_compressed'])
            avg_exploit_no_rl_list.append(res['avg_exploit_no_rl'])
            avg_exploit_rl_start_list.append(res['avg_exploit_rl_start'])
            avg_exploit_rl_list.append(res['avg_exploit_rl'])

    
    # Compute the means and standard deviations
    avg_knowledge = statistics.mean(knowledge_list)
    std_knowledge = statistics.stdev(knowledge_list)
    
    avg_population = statistics.mean(population_list)
    std_population = statistics.stdev(population_list)
    
    avg_numerosity = statistics.mean(numerosity_list)
    std_numerosity = statistics.stdev(numerosity_list)
    
    avg_reliable = statistics.mean(reliable_list)
    std_reliable = statistics.stdev(reliable_list)
    
    avg_knowledge_compressed = statistics.mean(knowledge_list_compressed)
    std_knowledge_compressed = statistics.stdev(knowledge_list_compressed)
    
    avg_population_compressed = statistics.mean(population_list_compressed)
    std_population_compressed = statistics.stdev(population_list_compressed)
    
    avg_numerosity_compressed = statistics.mean(numerosity_list_compressed)
    std_numerosity_compressed = statistics.stdev(numerosity_list_compressed)
    
    avg_reliable_compressed = statistics.mean(reliable_list_compressed)
    std_reliable_compressed = statistics.stdev(reliable_list_compressed)
    
    avg_exploit_no_rl = statistics.mean(avg_exploit_no_rl_list)
    std_exploit_no_rl = statistics.stdev(avg_exploit_no_rl_list)
    
    avg_exploit_rl_start = statistics.mean(avg_exploit_rl_start_list)
    std_exploit_rl_start = statistics.stdev(avg_exploit_rl_start_list)
    
    avg_exploit_rl = statistics.mean(avg_exploit_rl_list)
    std_exploit_rl = statistics.stdev(avg_exploit_rl_list)
    
    dic = {
        'maze'             : env.id,
        
        'avg_knowledge'    : avg_knowledge,
        'std_knowledge'    : std_knowledge,
        'avg_population'   : avg_population,
        'std_population'   : std_population,
        'avg_numerosity'   : avg_numerosity,
        'std_numerosity'   : std_numerosity,
        'avg_reliable'     : avg_reliable,
        'std_reliable'     : std_reliable,
        
        
        'knowledge_list' : knowledge_list,
        'population_list' : population_list,
        'numerosity_list' : numerosity_list,
        'reliable_list' : reliable_list,
        
        'avg_knowledge_compressed'    : avg_knowledge_compressed,
        'std_knowledge_compressed'    : std_knowledge_compressed,
        'avg_population_compressed'   : avg_population_compressed,
        'std_population_compressed'   : std_population_compressed,
        'avg_numerosity_compressed'   : avg_numerosity_compressed,
        'std_numerosity_compressed'   : std_numerosity_compressed,
        'avg_reliable_compressed'     : avg_reliable_compressed,
        'std_reliable_compressed'     : std_reliable_compressed,
        
        'knowledge_list_compressed' : knowledge_list_compressed,
        'population_list_compressed' : population_list_compressed,
        'numerosity_list_compressed' : numerosity_list_compressed,
        'reliable_list_compressed' : reliable_list_compressed,
        
        'avg_exploit_no_rl'      : avg_exploit_no_rl,
        'std_exploit_no_rl'      : std_exploit_no_rl,
        'avg_exploit_no_rl_list' : avg_exploit_no_rl_list,
        
        'avg_exploit_rl_start'      : avg_exploit_rl_start,
        'std_exploit_rl_start'      : std_exploit_rl_start,
        'avg_exploit_rl_start_list'      : avg_exploit_rl_start_list,
        
        'avg_exploit_rl'      : avg_exploit_rl,
        'std_exploit_rl'      : std_exploit_rl,
        'avg_exploit_rl_list' : avg_exploit_rl_list
    }
    
    return dic

result = [ compute_mean_and_stdev_for_one_env(env_name, parmap_results) for env_name in maze_envs_name]

In [None]:
print(result)