
    This Source Code Form is subject to the terms of the Mozilla Public
    License, v. 2.0. If a copy of the MPL was not distributed with this
    file, You can obtain one at http://mozilla.org/MPL/2.0/.


In [1]:
# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load EPEACS module
from epeacs.agents.epeacs import EPEACS, Configuration

from epeacs.metrics import \
    _maze_metrics, \
    _how_many_peps_match_non_aliased_states, \
    _mean_reliable_classifier_specificity, \
    _when_full_knowledge_is_achieved, \
    _state_of_population, \
    _enhanced_effect_error

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

# Agent - EPEACS - BENCHMARKING

## Main Parameters

In [2]:
RANDOM_ATTRIBUTE_LENGTH = 1
CLASSIFIER_LENGTH = 8 + RANDOM_ATTRIBUTE_LENGTH
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 100
METRICS_TRIAL_FREQUENCY_EXPLORE = 10
BETA_ALP = 0.05
BETA_RL = 0.05
BETA_PEP = 0.01
EPSILON = 0.8

DO_GA = True
ENABLE_PEP = True

SLIPPERY_PROB = 0.25

NUMBER_OF_ITERATIONS_TO_BENCH = 30

NB_OF_PROCESSES = 60

## Full Details of Agent Parameters

In [3]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=METRICS_TRIAL_FREQUENCY_EXPLORE,
    do_pep=ENABLE_PEP,
    do_ga=DO_GA,
    beta_alp=BETA_ALP,
    beta_rl=BETA_RL,
    beta_pep=BETA_PEP,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8
)

### Benchmarking - Maze

Custom functions for getting available environments in Gym depending on the type of mazes :

In [4]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE1") \
    or env.id.startswith("MazeE2") or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("MiyazakiB")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods14") \
    or env.id.startswith("Maze4") or env.id.startswith("Maze5") \
    or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Function to get benchmark value on one gym environment :

In [5]:
def bench_on_maze(env):
        
    # Initialize environment
    maze = gym.make(env.id)
    
    # Set up probability to do a random action
    maze.env.set_prob_slippery(SLIPPERY_PROB)
    
    # Set up random attribute length
    maze.env.set_random_attribute_length(RANDOM_ATTRIBUTE_LENGTH)

    # Reset it, by putting an agent into random position
    situation = maze.reset()

    # Training of EPEACS - Exploration
    agent_explore = EPEACS(cfg_explore)
    population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)

    # Get population metrics
    mean_reliable_classifier_specificity = _mean_reliable_classifier_specificity(population_explore, maze)
    first_trial, stable_trial, last_trial = _when_full_knowledge_is_achieved(metrics_explore)
    full_knowledge_trial_state_of_population = _state_of_population(metrics_explore, last_trial, METRICS_TRIAL_FREQUENCY_EXPLORE)
    old_pep_error, new_pep_error = _enhanced_effect_error(population_explore, maze, CLASSIFIER_LENGTH, RANDOM_ATTRIBUTE_LENGTH)
    
    result = {
        'maze' : env.id,
        'knowledge' : metrics_explore[-1]['knowledge'],
        'population' : metrics_explore[-1]['population'],
        'numerosity' : metrics_explore[-1]['numerosity'],
        'reliable' : metrics_explore[-1]['reliable'],
        'mean_reliable_classifier_specificity' : mean_reliable_classifier_specificity,
        'full_knowledge_first_trial' : first_trial,
        'full_knowledge_stable_trial' : stable_trial,
        'full_knowledge_last_trial' : last_trial,
        'full_knowledge_last_trial_state_of_population' : full_knowledge_trial_state_of_population,
        'old_pep_error': old_pep_error,
        'new_pep_error': new_pep_error
    }
    
    return result

Benchmarking :

In [None]:
maze_envs = []
maze_envs_name = []
for env in all_envs:
    if filter_envs_typeIII(env) or filter_envs_typeII(env) or filter_envs_typeI(env) or filter_envs_na(env):
    #if env.id.startswith("Woods100"):
        maze_envs_name.append(env.id)
        for i in range(NUMBER_OF_ITERATIONS_TO_BENCH):
            maze_envs.append(env)

parmap_results = parmap.map(bench_on_maze, maze_envs, pm_pbar=True, pm_processes=NB_OF_PROCESSES, pm_chunksize=1)

  6%|▋         | 44/690 [01:22<20:55,  1.94s/it] 

In [None]:
def compute_mean_and_stdev_for_one_env(env_name, parmap_results):
    knowledge_list = []
    population_list = []
    numerosity_list = []
    reliable_list = []
    mean_reliable_classifier_specificity_list = []
    full_knowledge_first_trial_list = []
    full_knowledge_stable_trial_list = []
    full_knowledge_last_trial_list = []
    full_knowledge_last_trial_state_of_population_list = []
    old_pep_error_list = []
    new_pep_error_list = []

    for res in parmap_results:
        if res['maze'] == env_name:
            knowledge_list.append(res['knowledge'])
            population_list.append(res['population'])
            numerosity_list.append(res['numerosity'])
            reliable_list.append(res['reliable'])
            mean_reliable_classifier_specificity_list.append(res['mean_reliable_classifier_specificity'])
            full_knowledge_first_trial_list.append(res['full_knowledge_first_trial'])
            full_knowledge_stable_trial_list.append(res['full_knowledge_stable_trial'])
            full_knowledge_last_trial_list.append(res['full_knowledge_last_trial'])
            full_knowledge_last_trial_state_of_population_list.append(res['full_knowledge_last_trial_state_of_population'])
            old_pep_error_list.append(res['old_pep_error'])
            new_pep_error_list.append(res['new_pep_error'])
    
    # Compute the means and standard deviations
    
    avg_knowledge = statistics.mean(knowledge_list)
    std_knowledge = statistics.stdev(knowledge_list)
    
    avg_population = statistics.mean(population_list)
    std_population = statistics.stdev(population_list)
    
    avg_numerosity = statistics.mean(numerosity_list)
    std_numerosity = statistics.stdev(numerosity_list)
    
    avg_reliable = statistics.mean(reliable_list)
    std_reliable = statistics.stdev(reliable_list)
    
    avg_mean_reliable_classifier_specificity = statistics.mean(mean_reliable_classifier_specificity_list)
    std_mean_reliable_classifier_specificity = statistics.stdev(mean_reliable_classifier_specificity_list)
    
    avg_full_knowledge_first_trial_list = statistics.mean(full_knowledge_first_trial_list)
    std_full_knowledge_first_trial_list = statistics.stdev(full_knowledge_first_trial_list)
    avg_full_knowledge_stable_trial_list = statistics.mean(full_knowledge_stable_trial_list)
    std_full_knowledge_stable_trial_list = statistics.stdev(full_knowledge_stable_trial_list)
    avg_full_knowledge_last_trial_list = statistics.mean(full_knowledge_last_trial_list)
    std_full_knowledge_last_trial_list = statistics.stdev(full_knowledge_last_trial_list)
    
    avg_old_pep_error_list = statistics.mean(old_pep_error_list)
    std_old_pep_error_list = statistics.stdev(old_pep_error_list)
    avg_new_pep_error_list = statistics.mean(new_pep_error_list)
    std_new_pep_error_list = statistics.stdev(new_pep_error_list)
    
    dic = {
        'maze'             : env_name,
        
        'avg_knowledge'    : avg_knowledge,
        'std_knowledge'    : std_knowledge,
        
        'avg_population'   : avg_population,
        'std_population'   : std_population,
        
        'avg_numerosity'   : avg_numerosity,
        'std_numerosity'   : std_numerosity,
        
        'avg_reliable'     : avg_reliable,
        'std_reliable'     : std_reliable,
        
        'avg_mean_reliable_classifier_specificity' : avg_mean_reliable_classifier_specificity,
        'std_mean_reliable_classifier_specificity' : std_mean_reliable_classifier_specificity,
        
        'avg_full_knowledge_first_trial_list' : avg_full_knowledge_first_trial_list,
        'std_full_knowledge_first_trial_list' : std_full_knowledge_first_trial_list,
        'avg_full_knowledge_stable_trial_list' : avg_full_knowledge_stable_trial_list,
        'std_full_knowledge_stable_trial_list' : std_full_knowledge_stable_trial_list,
        'avg_full_knowledge_last_trial_list' : avg_full_knowledge_last_trial_list,
        'std_full_knowledge_last_trial_list' : std_full_knowledge_last_trial_list,
        
        'avg_old_pep_error_list' : avg_old_pep_error_list,
        'std_old_pep_error_list' : std_old_pep_error_list,
        'avg_new_pep_error_list' : avg_new_pep_error_list,
        'std_new_pep_error_list' : std_new_pep_error_list,
        
        'knowledge_list' : knowledge_list,
        'population_list' : population_list,
        'numerosity_list' : numerosity_list,
        'reliable_list' : reliable_list,
        'mean_reliable_classifier_specificity_list' : mean_reliable_classifier_specificity_list,
        'full_knowledge_first_trial_list' : full_knowledge_first_trial_list,
        'full_knowledge_stable_trial_list' : full_knowledge_stable_trial_list,
        'full_knowledge_last_trial_list' : full_knowledge_last_trial_list,
        'full_knowledge_last_trial_state_of_population_list' : full_knowledge_last_trial_state_of_population_list,
        'old_pep_error_list' : old_pep_error_list,
        'new_pep_error_list' : new_pep_error_list
    }
    
    return dic

result = [ compute_mean_and_stdev_for_one_env(env_name, parmap_results) for env_name in maze_envs_name]

In [None]:
print(result)