In [1]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS ACS2 module
from lcs.agents.acs2 import ACS2, Configuration
from bacs.agents.bacs.utils.GymMazeWrapper import _maze_metrics, parse_metrics_to_df, plot_performance

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

# Agent - ACS2 - BENCHMARKING

## Main Parameters

In [2]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 1000
DO_ACTION_PLANNING_EXPLORE = False
DO_GA_EXPLORE = False
DO_SUBSUMPTION_EXPLORE = True
BETA_EXPLORE = 0.05
EPSILON = 0.8

NUMBER_OF_EXPLOIT_TRIALS = 500
DO_ACTION_PLANNING_EXPLOIT = False
DO_SUBSUMPTION_EXPLOIT = True
BETA_EXPLOIT = 0.00

NUMBER_OF_ITERATIONS_TO_BENCH = 30

## Full Details of Agent Parameters

In [3]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=DO_GA_EXPLORE,
    do_subsumption=DO_SUBSUMPTION_EXPLORE,
    do_action_planning=DO_ACTION_PLANNING_EXPLORE,
    action_planning_frequency=50,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8
)

cfg_exploit = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=DO_SUBSUMPTION_EXPLOIT,
    do_action_planning=DO_ACTION_PLANNING_EXPLOIT,
    action_planning_frequency=50,
    beta=BETA_EXPLOIT,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8
)

### Benchmarking - Maze

In [4]:
def bench_on_maze(env,n):
    avg_explore_list = []
    avg_exploit_list = []
    knowledge_list = []
    population_list = []
    numerosity_list = []
    reliable_list = []
    
    for i in range(n):
        
        # Initialize environment
        maze = gym.make(env.id)
        
        # Reset it, by putting an agent into random position
        situation = maze.reset()
    
        # Training of ACS2 - Exploration
        agent_explore = ACS2(cfg_explore)
        population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
    
        # Using ACS2 - Exploitation
        agent_exploit = ACS2(cfg_exploit, population_explore)
        population_exploit, metrics_exploit = agent_exploit.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS)

        # Plotting performance
        #metrics_df = parse_metrics_to_df(metrics_explore, metrics_exploit)
        #plot_performance(agent_exploit, maze, metrics_df, cfg_exploit, env.id)
        
        # Get average in exploration
        avg_step_explore = 0
        for trial in metrics_explore:
            avg_step_explore += trial['steps_in_trial']
        avg_step_explore /= NUMBER_OF_EXPLORE_TRIALS
        avg_explore_list.append(avg_step_explore)
        
        # Get average in exploitation
        avg_step_exploit = 0
        for trial in metrics_exploit:
            avg_step_exploit += trial['steps_in_trial']
        avg_step_exploit /= NUMBER_OF_EXPLOIT_TRIALS
        avg_exploit_list.append(avg_step_exploit)
        
        # Get other metrics
        knowledge_list.append(metrics_exploit[-1]['knowledge'])
        population_list.append(metrics_exploit[-1]['population'])
        numerosity_list.append(metrics_exploit[-1]['numerosity'])
        reliable_list.append(metrics_exploit[-1]['reliable'])
        
        print(env.id,'|','#' * (i+1) ,' ' * (n-(i+1)), '|')
    
    # Compute the means and standard deviations
    avg_explore = float(sum(avg_explore_list)) / n
    std_explore = statistics.pstdev(avg_explore_list)
    avg_exploit = float(sum(avg_exploit_list)) / n
    std_exploit = statistics.pstdev(avg_exploit_list)
    avg_knowledge = float(sum(knowledge_list)) / n
    std_knowledge = statistics.pstdev(knowledge_list)
    avg_population = float(sum(population_list)) / n
    std_population = statistics.pstdev(population_list)
    avg_numerosity = float(sum(numerosity_list)) / n
    std_numerosity = statistics.pstdev(numerosity_list)
    avg_reliable = float(sum(reliable_list)) / n
    std_reliable = statistics.pstdev(reliable_list)
    
    result = {
        'maze'             : env.id,
        'avg_explore'      : avg_explore,
        'std_explore'      : std_explore,
        'avg_exploit'      : avg_exploit,
        'std_exploit'      : std_exploit,
        'avg_knowledge'    : avg_knowledge,
        'std_knowledge'    : std_knowledge,
        'avg_population'   : avg_population,
        'std_population'   : std_population,
        'avg_numerosity'   : avg_numerosity,
        'std_numerosity'   : std_numerosity,
        'avg_reliable'     : avg_reliable,
        'std_reliable'     : std_reliable,
        'avg_exploit_list' : avg_exploit_list
    }
    
    print(result)
    
    return result

Custom functions for getting available environments in Gym depending on the type of mazes :

In [5]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE") \
    or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("Sutton") or env.id.startswith("MiyazakiB") \
    or env.id.startswith("Lab1")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods1-") \
    or env.id.startswith("Woods14") or env.id.startswith("Maze4") \
    or env.id.startswith("Maze5") or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Benchmarking without the genetic algorithms on mazes :

In [6]:
maze_envs = [env for env in all_envs if filter_envs_na(env) or filter_envs_typeI(env) or filter_envs_typeII(env) or filter_envs_typeIII(env)]

results = parmap.map(bench_on_maze, maze_envs, NUMBER_OF_ITERATIONS_TO_BENCH, pm_pbar=True, pm_processes=4)

  0%|          | 0/26 [00:00<?, ?it/s]

Littman57-v0 | #                               |
Cassandra4x4-v0 | #                               |
Littman57-v0 | ##                              |
MazeA-v0 | #                               |
MazeD-v0 | #                               |
Cassandra4x4-v0 | ##                              |
Littman57-v0 | ###                             |
Cassandra4x4-v0 | ###                             |
Littman57-v0 | ####                            |
MazeA-v0 | ##                              |
MazeD-v0 | ##                              |
Littman57-v0 | #####                           |
Cassandra4x4-v0 | ####                            |
Littman57-v0 | ######                          |
MazeD-v0 | ###                             |
Cassandra4x4-v0 | #####                           |
Littman57-v0 | #######                         |
MazeA-v0 | ###                             |
Littman57-v0 | ########                        |
Cassandra4x4-v0 | ######                          |
Littman57-v0 | #########  

Littman89-v0 | ####################            |
MazeF4-v0 | #                               |
MazeA-v0 | #############################   |
MazeF4-v0 | ##                              |
Littman89-v0 | #####################           |
MazeA-v0 | ##############################  |
{'maze': 'MazeA-v0', 'avg_explore': 61.26516666666666, 'std_explore': 1.448228413921261, 'avg_exploit': 6.033333333333333, 'std_exploit': 0.17950549357115014, 'avg_knowledge': 100.0, 'std_knowledge': 0.0, 'avg_population': 191.43333333333334, 'std_population': 30.85307476123283, 'avg_numerosity': 191.43333333333334, 'std_numerosity': 30.85307476123283, 'avg_reliable': 191.43333333333334, 'std_reliable': 30.85307476123283, 'avg_exploit_list': [6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0]}
MazeF4-v0 | ###                             |
Littman89-v0 | ######################          |
MazeF4-v0 | ####           

  8%|▊         | 2/26 [1:11:14<14:14:52, 2137.17s/it]

MazeF4-v0 | ##############                  |
MazeB-v0 | #####                           |
MazeF4-v0 | ###############                 |
Maze4-v0 | #                               |
MazeF4-v0 | ################                |
MazeB-v0 | ######                          |
MazeF4-v0 | #################               |
Maze4-v0 | ##                              |
MazeF4-v0 | ##################              |
MazeB-v0 | #######                         |
MazeF4-v0 | ###################             |
Maze4-v0 | ###                             |
MazeF4-v0 | ####################            |
MazeB-v0 | ########                        |
Lab1-v0 | #####                           |
MazeF4-v0 | #####################           |
Maze4-v0 | ####                            |
MazeF4-v0 | ######################          |
MazeB-v0 | #########                       |
MazeF4-v0 | #######################         |
Maze4-v0 | #####                           |
MazeF4-v0 | ########################        |


 15%|█▌        | 4/26 [1:30:14<10:11:14, 1667.03s/it]

Maze7-v0 | #                               |
MazeB-v0 | #############                   |
Maze4-v0 | #########                       |
Maze7-v0 | ##                              |
Maze7-v0 | ###                             |
MazeB-v0 | ##############                  |
Maze7-v0 | ####                            |
Maze4-v0 | ##########                      |
Maze7-v0 | #####                           |
Maze7-v0 | ######                          |
MazeB-v0 | ###############                 |
Maze7-v0 | #######                         |
Maze4-v0 | ###########                     |
Maze7-v0 | ########                        |
Lab1-v0 | #######                         |
MazeB-v0 | ################                |
Maze7-v0 | #########                       |
Maze7-v0 | ##########                      |
Maze7-v0 | ###########                     |
Maze4-v0 | ############                    |
MazeB-v0 | #################               |
Maze7-v0 | ############                    |
Maze7-v0 | 

 23%|██▎       | 6/26 [2:12:40<8:36:16, 1548.85s/it] 

Maze4-v0 | #########################       |
Maze10-v0 | ##########                      |
Maze4-v0 | ##########################      |
Maze10-v0 | ###########                     |
MazeE1-v0 | #                               |
Maze10-v0 | ############                    |
Maze4-v0 | ###########################     |
Maze10-v0 | #############                   |
Maze4-v0 | ############################    |
Maze10-v0 | ##############                  |
Lab1-v0 | ############                    |
Maze10-v0 | ###############                 |
MazeE1-v0 | ##                              |
Maze4-v0 | #############################   |
Maze10-v0 | ################                |
Maze10-v0 | #################               |
Maze4-v0 | ##############################  |
{'maze': 'Maze4-v0', 'avg_explore': 31.68576666666667, 'std_explore': 1.369474441366306, 'avg_exploit': 3.517199999999998, 'std_exploit': 0.051418803305146345, 'avg_knowledge': 100.0, 'std_knowledge': 0.0, 'avg_population': 27

 31%|███       | 8/26 [2:45:22<6:53:33, 1378.52s/it]

MazeF1-v0 | #                               |
MazeF1-v0 | ##                              |
MazeF1-v0 | ###                             |
MazeF1-v0 | ####                            |
MazeE1-v0 | #######                         |
MazeF1-v0 | #####                           |
MazeF1-v0 | ######                          |
MazeF1-v0 | #######                         |
MazeF1-v0 | ########                        |
Maze5-v0 | #####                           |
Lab1-v0 | ###############                 |
MazeF1-v0 | #########                       |
MazeF1-v0 | ##########                      |
MazeF1-v0 | ###########                     |
MazeF1-v0 | ############                    |
MazeF1-v0 | #############                   |
MazeF1-v0 | ##############                  |
MazeF1-v0 | ###############                 |
MazeF1-v0 | ################                |
MazeF1-v0 | #################               |
MazeF1-v0 | ##################              |
MazeF1-v0 | ###################      

 38%|███▊      | 10/26 [3:00:48<4:54:21, 1103.87s/it]

Maze5-v0 | #########                       |
MiyazakiA-v0 | #                               |
Lab1-v0 | #################               |
MazeE1-v0 | ###########                     |
Maze5-v0 | ##########                      |
MiyazakiA-v0 | ##                              |
MazeE1-v0 | ############                    |
MiyazakiA-v0 | ###                             |
Maze5-v0 | ###########                     |
MiyazakiA-v0 | ####                            |
Lab1-v0 | ##################              |
Maze5-v0 | ############                    |
MazeE1-v0 | #############                   |
MiyazakiA-v0 | #####                           |
MiyazakiA-v0 | ######                          |
Maze5-v0 | #############                   |
MazeE1-v0 | ##############                  |
MiyazakiA-v0 | #######                         |
Maze5-v0 | ##############                  |
MiyazakiA-v0 | ########                        |
Lab1-v0 | ###################             |
MazeE1-v0 | ##########

 46%|████▌     | 12/26 [4:31:53<6:11:33, 1592.37s/it]

MazeF3-v0 | #                               |
MazeF3-v0 | ##                              |
MazeE1-v0 | ##############################  |
{'maze': 'MazeE1-v0', 'avg_explore': 20.264799999999997, 'std_explore': 0.6372878679738586, 'avg_exploit': 35.663666666666664, 'std_exploit': 8.65912225087252, 'avg_knowledge': 53.43055555555554, 'std_knowledge': 0.2064731770460892, 'avg_population': 498.73333333333335, 'std_population': 50.85727305137606, 'avg_numerosity': 498.73333333333335, 'std_numerosity': 50.85727305137606, 'avg_reliable': 299.7, 'std_reliable': 38.26673925660943, 'avg_exploit_list': [39.116, 31.844, 42.634, 43.544, 46.49, 23.752, 46.672, 50.328, 25.302, 32.098, 35.54, 27.556, 45.952, 28.468, 43.902, 33.548, 28.61, 36.01, 31.698, 11.06, 26.372, 26.256, 39.838, 41.02, 29.328, 45.236, 36.708, 41.966, 37.7, 41.362]}
MazeF3-v0 | ###                             |
MazeF3-v0 | ####                            |
MiyazakiB-v0 | ###                             |
MazeF3-v0 | #####         

 54%|█████▍    | 14/26 [5:03:15<4:39:23, 1396.98s/it]

Woods1-v0 | #                               |
Woods1-v0 | ##                              |
MiyazakiB-v0 | ############                    |
Woods1-v0 | ###                             |
Woods1-v0 | ####                            |
Woods1-v0 | #####                           |
Woods1-v0 | ######                          |
Woods1-v0 | #######                         |
Woods1-v0 | ########                        |
Woods1-v0 | #########                       |
Woods1-v0 | ##########                      |
MiyazakiB-v0 | #############                   |
Woods1-v0 | ###########                     |
Woods1-v0 | ############                    |
Woods1-v0 | #############                   |
MazeE2-v0 | ####                            |
Woods1-v0 | ##############                  |
Woods1-v0 | ###############                 |
Sutton-v0 | ##                              |
Woods1-v0 | ################                |
Woods1-v0 | #################               |
Woods1-v0 | ################

 62%|██████▏   | 16/26 [5:50:13<3:53:26, 1400.62s/it]

Woods100-v0 | #                               |
Woods100-v0 | ##                              |
Woods100-v0 | ###                             |
Woods100-v0 | ####                            |
MiyazakiB-v0 | ##########################      |
Woods100-v0 | #####                           |
Woods100-v0 | ######                          |
Woods100-v0 | #######                         |
MazeE2-v0 | #########                       |
Woods100-v0 | ########                        |
Woods100-v0 | #########                       |
Woods100-v0 | ##########                      |
Woods100-v0 | ###########                     |
Woods100-v0 | ############                    |
Woods100-v0 | #############                   |
Woods100-v0 | ##############                  |
Woods100-v0 | ###############                 |
Woods100-v0 | ################                |
Woods100-v0 | #################               |
Woods100-v0 | ##################              |
Woods100-v0 | ###################        

 69%|██████▉   | 18/26 [6:03:45<2:26:57, 1102.24s/it]

Woods101-v0 | #############                   |
Woods101demi-v0 | #                               |
Woods101-v0 | ##############                  |
Woods101demi-v0 | ##                              |
Woods101-v0 | ###############                 |
Woods101-v0 | ################                |
Woods101demi-v0 | ###                             |
Woods101-v0 | #################               |
MazeE2-v0 | ###########                     |
Woods101demi-v0 | ####                            |
Woods101-v0 | ##################              |
Woods101-v0 | ###################             |
Woods101demi-v0 | #####                           |
Woods101-v0 | ####################            |
Woods101demi-v0 | ######                          |
Woods101-v0 | #####################           |
Woods101demi-v0 | #######                         |
Woods101-v0 | ######################          |
Sutton-v0 | #######                         |
Woods101-v0 | #######################         |
Woods101demi-v0 

 77%|███████▋  | 20/26 [6:17:03<1:29:07, 891.28s/it] 

Woods101demi-v0 | ##############                  |
Woods101demi-v0 | ###############                 |
Woods101demi-v0 | ################                |
Woods101demi-v0 | #################               |
Woods101demi-v0 | ##################              |
Woods101demi-v0 | ###################             |
Woods101demi-v0 | ####################            |
Sutton-v0 | ########                        |
MazeE2-v0 | #############                   |
Woods101demi-v0 | #####################           |
Woods101demi-v0 | ######################          |
Woods101demi-v0 | #######################         |
Woods101demi-v0 | ########################        |
Woods101demi-v0 | #########################       |
Woods101demi-v0 | ##########################      |
Woods101demi-v0 | ###########################     |
Woods101demi-v0 | ############################    |
Woods101demi-v0 | #############################   |
MazeE2-v0 | ##############                  |
Woods101demi-v0 | ############

 85%|████████▍ | 22/26 [7:19:59<1:19:21, 1190.33s/it]

Sutton-v0 | #############                   |
MazeE2-v0 | ####################            |
MazeE2-v0 | #####################           |
Sutton-v0 | ##############                  |
MazeE2-v0 | ######################          |
Sutton-v0 | ###############                 |
MazeE2-v0 | #######################         |
MazeE2-v0 | ########################        |
Sutton-v0 | ################                |
MazeE2-v0 | #########################       |
Sutton-v0 | #################               |
MazeE2-v0 | ##########################      |
Sutton-v0 | ##################              |
MazeE2-v0 | ###########################     |
Sutton-v0 | ###################             |
MazeE2-v0 | ############################    |
MazeE2-v0 | #############################   |
Sutton-v0 | ####################            |
MazeE2-v0 | ##############################  |
{'maze': 'MazeE2-v0', 'avg_explore': 28.162599999999998, 'std_explore': 1.1221137672565409, 'avg_exploit': 59.72439999999999, 

 92%|█████████▏| 24/26 [8:53:12<55:44, 1672.09s/it]  

Sutton-v0 | #####################           |
Sutton-v0 | ######################          |
Sutton-v0 | #######################         |
Sutton-v0 | ########################        |
Sutton-v0 | #########################       |
Sutton-v0 | ##########################      |
Sutton-v0 | ###########################     |
Sutton-v0 | ############################    |
Sutton-v0 | #############################   |
Sutton-v0 | ##############################  |
{'maze': 'Sutton-v0', 'avg_explore': 80.69413333333334, 'std_explore': 3.1543095465657065, 'avg_exploit': 84.60733333333334, 'std_exploit': 6.843720709932249, 'avg_knowledge': 48.87989203778677, 'std_knowledge': 0.7747746339761025, 'avg_population': 721.9666666666667, 'std_population': 58.26118395257305, 'avg_numerosity': 721.9666666666667, 'std_numerosity': 58.26118395257305, 'avg_reliable': 419.76666666666665, 'std_reliable': 40.77146333187248, 'avg_exploit_list': [88.148, 77.664, 81.504, 87.178, 87.836, 91.186, 92.938, 85.88, 87.46

100%|██████████| 26/26 [10:34:29<00:00, 1464.22s/it]


Parsing the previous result to get a markdown string for each environment :

In [7]:
markdown_str = ''
for item in results:
    markdown_str += '|' + item['maze'] + '|'
    markdown_str += "{:.2f}".format(item['avg_explore']) + '|'
    markdown_str += "{:.2f}".format(item['std_explore']) + '|'
    markdown_str += "{:.2f}".format(item['avg_exploit']) + '|'
    markdown_str += "{:.2f}".format(item['std_exploit']) + '|'
    markdown_str += "{:.2f}".format(min(item['avg_exploit_list'])) + '|'
    markdown_str += "{:.2f}".format(max(item['avg_exploit_list'])) + '|'
    markdown_str += '0' + '|'
    markdown_str += "{:.2f}".format(item['avg_knowledge']) + '|'
    markdown_str += "{:.2f}".format(item['std_knowledge']) + '|'
    markdown_str += "{:.2f}".format(item['avg_population']) + '|'
    markdown_str += "{:.2f}".format(item['std_population']) + '|'
    markdown_str += "{:.2f}".format(item['avg_reliable']) + '|'
    markdown_str += "{:.2f}".format(item['std_reliable']) + '|'
    markdown_str += '\n'
print(markdown_str)

|Cassandra4x4-v0|13.72|0.66|4.94|9.28|2.36|54.86|0|51.03|0.92|247.20|29.37|145.67|19.23|
|Lab1-v0|99.50|0.22|100.00|0.00|100.00|100.00|0|63.88|0.95|500.83|37.27|337.47|31.44|
|Littman57-v0|22.06|1.69|43.83|24.26|3.53|65.14|0|72.52|2.95|100.07|8.00|92.43|8.02|
|Littman89-v0|32.98|4.84|25.48|26.98|4.54|67.82|0|71.52|0.32|198.53|24.71|168.27|22.73|
|MazeA-v0|61.27|1.45|6.03|0.18|6.00|7.00|0|100.00|0.00|191.43|30.85|191.43|30.85|
|MazeB-v0|46.66|2.55|10.30|16.66|7.00|100.00|0|80.46|0.24|281.97|31.42|239.93|26.48|
|MazeD-v0|34.64|1.61|4.00|0.00|4.00|4.00|0|82.08|2.06|245.03|37.94|209.37|33.41|
|MazeF4-v0|56.35|2.35|53.31|6.68|42.03|74.98|0|88.13|2.83|100.90|7.44|89.47|8.23|
|Maze4-v0|31.69|1.37|3.52|0.05|3.42|3.60|0|100.00|0.00|276.30|30.72|276.30|30.72|
|Maze5-v0|47.24|1.66|4.64|0.10|4.37|4.91|0|100.00|0.00|373.20|37.52|373.20|37.52|
|Maze7-v0|49.65|2.08|47.34|7.75|24.77|59.06|0|83.02|2.93|93.23|7.84|86.63|7.98|
|Maze10-v0|51.81|1.51|74.34|9.17|53.62|95.84|0|65.46|2.37|119.67|12.82|96.63|1

### Last results
 
The parameters used are:
 
- no use of action planning
- use of subsumption in the anticipatory learning process
- use od subsumption in the genetic algorithm is this last one is used
- $\gamma$ = 0.95
- $\theta_i$ = 0.1
- $\theta_r$ = 0.9
- $u_{max}$ = 8 (length of the classifier)
- $\theta_{exp}$ = 20
- $\beta$ = 0.05
- $\epsilon$ = 0.8
 
If genetic algorithms are used:

- $\theta_{ga}$ = 100
- $\theta_{as}$ = 20
- $\mu$ = 0.3
- $\chi$ = 0.8,

The mazes in the following table have been sorted by aliasing type and then by complexity top-down.

For 1000 trials in Exploration and 500 trials in Exploitation for all environments, computed 30 times for calculating all averages:
 
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|28.16|1.12|59.72|3.82|52.56|70.67|0|37.87|1.98|668.07|56.80|378.20|44.29|
|Woods101demi-v0|35.57|1.88|68.34|3.23|62.73|74.77|0|52.38|0.00|109.50|10.12|72.93|9.58|
|Maze10-v0|51.81|1.51|74.34|9.17|53.62|95.84|0|65.46|2.37|119.67|12.82|96.63|11.88|
|Woods102-v0|38.08|1.27|57.76|8.80|44.89|69.17|0|51.26|0.22|152.23|17.40|116.03|17.44|
|Woods100-v0|16.16|0.48|34.31|2.12|28.64|39.24|0|60.00|0.00|18.07|0.36|14.07|0.36|
|Woods101-v0|31.34|1.32|41.79|2.55|37.01|47.74|0|63.09|0.66|96.43|10.10|83.00|9.71|
|MazeE1-v0|20.26|0.64|35.66|8.66|11.06|50.33|0|53.43|0.21|498.73|50.86|299.70|38.27|
|**Type II**      ||||||||||||||
|MazeF4-v0|56.35|2.35|53.31|6.68|42.03|74.98|0|88.13|2.83|100.90|7.44|89.47|8.23|
|Maze7-v0|49.65|2.08|47.34|7.75|24.77|59.06|0|83.02|2.93|93.23|7.84|86.63|7.98|
|Sutton-v0|80.69|3.15|84.61|6.84|64.87|93.54|0|48.88|0.77|721.97|58.26|419.77|40.77|
|Lab1-v0|99.50|0.22|100.00|0.00|100.00|100.00|0|63.88|0.95|500.83|37.27|337.47|31.44|
|MiyazakiB-v0|41.97|1.71|6.97|7.39|3.52|25.55|0|78.60|0.13|340.57|29.77|279.90|28.81|
|**Type I**       ||||||||||||||
|MazeB-v0|46.66|2.55|10.30|16.66|7.00|100.00|0|80.46|0.24|281.97|31.42|239.93|26.48|
|Littman89-v0|32.98|4.84|25.48|26.98|4.54|67.82|0|71.52|0.32|198.53|24.71|168.27|22.73|
|MiyazakiA-v0|27.12|1.62|8.42|11.53|3.34|43.81|0|70.71|0.17|332.27|31.20|250.43|24.45|
|MazeD-v0|34.64|1.61|4.00|0.00|4.00|4.00|0|82.08|2.06|245.03|37.94|209.37|33.41|
|Cassandra4x4-v0|13.72|0.66|4.94|9.28|2.36|54.86|0|51.03|0.92|247.20|29.37|145.67|19.23|
|Littman57-v0|22.06|1.69|43.83|24.26|3.53|65.14|0|72.52|2.95|100.07|8.00|92.43|8.02|
|**Not Aliased**||||||||||||||
|Maze4-v0|31.69|1.37|3.52|0.05|3.42|3.60|0|100.00|0.00|276.30|30.72|276.30|30.72|
|Maze5-v0|47.24|1.66|4.64|0.10|4.37|4.91|0|100.00|0.00|373.20|37.52|373.20|37.52|
|MazeA-v0|61.27|1.45|6.03|0.18|6.00|7.00|0|100.00|0.00|191.43|30.85|191.43|30.85|
|MazeF1-v0|10.05|0.28|1.80|0.04|1.70|1.87|0|100.00|0.00|79.50|6.37|79.50|6.37|
|MazeF2-v0|17.64|0.56|2.51|0.04|2.42|2.59|0|100.00|0.00|80.93|9.28|80.93|9.28|
|MazeF3-v0|26.71|0.70|3.38|0.04|3.29|3.47|0|100.00|0.00|93.43|10.14|93.43|10.14|
|Woods1-v0|9.01|0.25|1.62|0.03|1.55|1.67|0|100.00|0.00|121.57|23.39|121.57|23.39|
|Woods14-v0|68.74|1.59|9.49|0.17|9.19|9.85|0|100.00|0.00|121.50|15.39|121.50|15.39|


### Complexity and Optimal Performance by Maze

These measures come from Zatuchna and Bagnall paper :

> Zhanna V. Zatuchna and Anthony Bagnall. 2009. Learning Mazes with Aliasing States: An LCS Algorithm with Associative Perception. Adaptive Behavior - Animals, Animats, Software Agents, Robots, Adaptive Systems 17, 1 (February 2009), 28-57. DOI=http://dx.doi.org/10.1177/1059712308099230

$\phi$ is about the average distance to reward in maze. Its calculation depends mainly on the type of maze. Here is reported $\phi$' if the maze is aliased, the original $\phi$ otherwise.

$\psi$ is about the complexity of the maze that depends on the average distance to reward and on the average steps taken by trained Q-learning.

I also added $d$, the maximal distance to food I consider to count the number of succesfull tries.

The question marks (**?**) highlight some discrepancies present in the original paper.

The asterisks (**\***) indicate pieces of information were not provided.

For further details, please see the original paper.

#### For Type III Aliased Mazes

|       |MazeE2      |Woods101demi|Maze10      |Woods102    |Woods100    |Woods101    |MazeE1       |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:-----------:|
|$d$    |3           |4           |8           |4           |3           |4           |4            |
|$\phi$'|2.33        |3.1         |5.17        |3.31        |2.33        |2.9         |3.07         |
|$\psi$ |251.2       |251 **?**   |171         |167         |166         |149         |167 **?**    |

#### For Type II Aliased Mazes

|       |MazeF4      |Maze7       |Sutton      |Lab1        |MiyazakiB   |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|
|$d$    |7           |7           | 10         | **\***     |5           |
|$\phi$'|4.5 **?**   |4.33        | **\***     | **\***     |3.33        |
|$\psi$ |47 **?**    |82 **?**    | **?**      | **?**      |1.03        |

#### For Type I Aliased Mazes

|       |MazeB       |Littman89   |MiyazakiA   |MazeD       |Cassandra4x4|Littman57   |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
|$d$    |5           |7           |5           |4           |3           |8           |
|$\phi$'|3.5         |3.77        |3.05        |2.75        |2.27        |3.71        |
|$\psi$ |1.26        |61 **?**    |69 **?**    |1.03        |1           |154 **?**   |

#### For Non Aliased Mazes

|       |Maze4       |Maze5       |MazeA       |MazeF1      |MazeF2      |MazeF3      |Woods1      |Woods14     |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
|$d$    |5           |8           |6           |3           |4           |5           |2           |18          |
|$\phi$ |3.5         |4.61        |4.23        |1.8         |2.5         |3.38        |1.63        |9.5         |
|$\psi$ | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     |