In [1]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load BACS module
from bacs.agents.bacs import BACS, Configuration
from bacs.agents.bacs.utils.GymMazeWrapper import _maze_metrics, parse_metrics_to_df, plot_performance

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

# Agent - BACS - BENCHMARKING

## Main Parameters

In [2]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 5000
DO_ACTION_PLANNING_EXPLORE = False
DO_GA_EXPLORE = False
DO_SUBSUMPTION_EXPLORE = True
BETA_EXPLORE = 0.05
EPSILON = 0.8

NUMBER_OF_EXPLOIT_TRIALS = 500
DO_ACTION_PLANNING_EXPLOIT = False
DO_SUBSUMPTION_EXPLOIT = True
BETA_EXPLOIT = 0.00

LENGTH_OF_BEHAVIORAL_SEQUENCES = 1

NUMBER_OF_ITERATIONS_TO_BENCH = 30

NB_OF_PROCESSES = 12

## Full Details of Agent Parameters

In [3]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=DO_GA_EXPLORE,
    do_subsumption=DO_SUBSUMPTION_EXPLORE,
    do_action_planning=DO_ACTION_PLANNING_EXPLORE,
    action_planning_frequency=50,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=DO_SUBSUMPTION_EXPLOIT,
    do_action_planning=DO_ACTION_PLANNING_EXPLOIT,
    action_planning_frequency=50,
    beta=BETA_EXPLOIT,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

### Benchmarking - Maze

In [4]:
def bench_on_maze(env,n):
    avg_explore_list = []
    avg_exploit_list = []
    knowledge_list = []
    population_list = []
    numerosity_list = []
    reliable_list = []
    
    for i in range(n):
        
        # Initialize environment
        maze = gym.make(env.id)
        
        # Reset it, by putting an agent into random position
        situation = maze.reset()
    
        # Training of BACS - Exploration
        agent_explore = BACS(cfg_explore)
        population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
    
        # Using BACS - Exploitation
        agent_exploit = BACS(cfg_exploit, population_explore)
        agent_exploit.clean_population()
        population_exploit, metrics_exploit = agent_exploit.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS)

        # Plotting performance
        #metrics_df = parse_metrics_to_df(metrics_explore, metrics_exploit)
        #plot_performance(agent_exploit, maze, metrics_df, cfg_exploit, env.id)
        
        # Get average in exploration
        avg_step_explore = 0
        for trial in metrics_explore:
            avg_step_explore += trial['steps_in_trial']
        avg_step_explore /= NUMBER_OF_EXPLORE_TRIALS
        avg_explore_list.append(avg_step_explore)
        
        # Get average in exploitation
        avg_step_exploit = 0
        for trial in metrics_exploit:
            avg_step_exploit += trial['steps_in_trial']
        avg_step_exploit /= NUMBER_OF_EXPLOIT_TRIALS
        avg_exploit_list.append(avg_step_exploit)
        
        # Get other metrics
        knowledge_list.append(metrics_exploit[-1]['knowledge'])
        population_list.append(metrics_exploit[-1]['population'])
        numerosity_list.append(metrics_exploit[-1]['numerosity'])
        reliable_list.append(metrics_exploit[-1]['reliable'])
        
        print(env.id,'|','#' * (i+1) ,' ' * (n-(i+1)), '|')
    
    # Compute the means and standard deviations
    avg_explore = float(sum(avg_explore_list)) / n
    std_explore = statistics.pstdev(avg_explore_list)
    avg_exploit = float(sum(avg_exploit_list)) / n
    std_exploit = statistics.pstdev(avg_exploit_list)
    avg_knowledge = float(sum(knowledge_list)) / n
    std_knowledge = statistics.pstdev(knowledge_list)
    avg_population = float(sum(population_list)) / n
    std_population = statistics.pstdev(population_list)
    avg_numerosity = float(sum(numerosity_list)) / n
    std_numerosity = statistics.pstdev(numerosity_list)
    avg_reliable = float(sum(reliable_list)) / n
    std_reliable = statistics.pstdev(reliable_list)
    
    result = {
        'maze'             : env.id,
        'avg_explore'      : avg_explore,
        'std_explore'      : std_explore,
        'avg_exploit'      : avg_exploit,
        'std_exploit'      : std_exploit,
        'avg_knowledge'    : avg_knowledge,
        'std_knowledge'    : std_knowledge,
        'avg_population'   : avg_population,
        'std_population'   : std_population,
        'avg_numerosity'   : avg_numerosity,
        'std_numerosity'   : std_numerosity,
        'avg_reliable'     : avg_reliable,
        'std_reliable'     : std_reliable,
        'avg_exploit_list' : avg_exploit_list
    }
    
    print(result)
    
    return result

Custom functions for getting available environments in Gym depending on the type of mazes :

In [5]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE1") \
    or env.id.startswith("MazeE2") or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("MiyazakiB")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods1-") \
    or env.id.startswith("Woods14") or env.id.startswith("Maze4") \
    or env.id.startswith("Maze5") or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Benchmarking without the genetic algorithms :

In [6]:
maze_envs = [env for env in all_envs if filter_envs_na(env) or filter_envs_typeI(env) or filter_envs_typeII(env) or filter_envs_typeIII(env)]
maze_envs = [env for env in all_envs if env.id.startswith("Woods102")]
results = parmap.map(bench_on_maze, maze_envs, NUMBER_OF_ITERATIONS_TO_BENCH, pm_pbar=True, pm_processes=NB_OF_PROCESSES)

  0%|          | 0/1 [00:00<?, ?it/s]

Woods102-v0 | #                               |
Woods102-v0 | ##                              |
Woods102-v0 | ###                             |
Woods102-v0 | ####                            |
Woods102-v0 | #####                           |
Woods102-v0 | ######                          |
Woods102-v0 | #######                         |
Woods102-v0 | ########                        |
Woods102-v0 | #########                       |
Woods102-v0 | ##########                      |
Woods102-v0 | ###########                     |
Woods102-v0 | ############                    |
Woods102-v0 | #############                   |
Woods102-v0 | ##############                  |
Woods102-v0 | ###############                 |
Woods102-v0 | ################                |
Woods102-v0 | #################               |
Woods102-v0 | ##################              |
Woods102-v0 | ###################             |
Woods102-v0 | ####################            |
Woods102-v0 | #####################     

100%|██████████| 1/1 [3:27:38<00:00, 12458.06s/it]


Parsing the previous result to get a markdown string for each environment :

In [7]:
markdown_str = ''

for item in results:
    markdown_str += '|' + item['maze'] + '|'
    markdown_str += "{:.2f}".format(item['avg_explore']) + '|'
    markdown_str += "{:.2f}".format(item['std_explore']) + '|'
    markdown_str += "{:.2f}".format(item['avg_exploit']) + '|'
    markdown_str += "{:.2f}".format(item['std_exploit']) + '|'
    markdown_str += "{:.2f}".format(min(item['avg_exploit_list'])) + '|'
    markdown_str += "{:.2f}".format(max(item['avg_exploit_list'])) + '|'
    markdown_str += '0' + '|'
    markdown_str += "{:.2f}".format(item['avg_knowledge']) + '|'
    markdown_str += "{:.2f}".format(item['std_knowledge']) + '|'
    markdown_str += "{:.2f}".format(item['avg_population']) + '|'
    markdown_str += "{:.2f}".format(item['std_population']) + '|'
    markdown_str += "{:.2f}".format(item['avg_reliable']) + '|'
    markdown_str += "{:.2f}".format(item['std_reliable']) + '|'
    markdown_str += '\n'
    
print(markdown_str)

|Woods102-v0|30.87|0.65|6.57|6.40|3.37|25.78|0|95.12|0.00|219.90|2.82|134.77|0.80|



### Last results
 
The parameters used are:
 
- no use of action planning
- use of subsumption in the anticipatory learning process
- use of subsumption in the genetic algorithm if this last one is used
- $\gamma$ = 0.95
- $\theta_i$ = 0.1
- $\theta_r$ = 0.9
- $u_{max}$ = 8 (length of the classifier)
- $\theta_{exp}$ = 20
- $\beta$ = 0.05
- $\epsilon$ = 0.8
- $bs_{max} = 1$
 
If genetic algorithms are used:

- $\theta_{ga}$ = 100
- $\theta_{as}$ = 20
- $\mu$ = 0.3
- $\chi$ = 0.8
 
The mazes in the following table have been sorted by aliasing type and then by complexity top-down.

#### Experiments with all environments without GA

For 1000 trials in Exploration and 500 trials in Exploitation for all environments, computed 30 times for calculating all averages, having $bs_{max} = 1$:
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|29.46|1.61|12.42|8.31|3.80|34.56|0|48.75|3.32|858.03|46.34|124.47|13.13|
|Woods101demi-v0|27.17|1.00|3.99|2.63|3.01|13.10|0|90.48|0.00|141.07|8.16|73.17|1.39|
|Maze10-v0|54.45|1.61|65.79|14.66|37.52|88.83|0|90.21|1.04|94.87|3.04|45.93|1.71|
|Woods102-v0|28.67|1.82|3.81|0.20|3.43|4.30|0|94.76|0.84|219.47|3.93|124.27|4.80|
|Woods100-v0|12.34|0.40|2.33|0.05|2.23|2.41|0|100.00|0.00|14.00|0.00|10.00|0.00|
|Woods101-v0|25.18|1.03|3.04|0.08|2.92|3.27|0|92.59|0.00|60.70|3.22|46.17|1.53|
|MazeE1-v0|23.87|1.30|4.55|2.33|3.14|12.02|0|67.78|3.45|1036.60|38.03|228.57|19.87|
|**Type II**      ||||||||||||||
|MazeF4-v0|30.92|0.97|4.51|0.07|4.33|4.65|0|100.00|0.00|40.97|2.86|31.47|0.56|
|Maze7-v0|28.32|1.20|4.33|0.08|4.17|4.46|0|100.00|0.00|40.57|1.73|33.00|0.00|
|MiyazakiB-v0|39.49|3.58|3.76|0.20|3.44|4.32|0|99.88|0.32|397.77|19.24|265.70|9.19|
|**Type I**       ||||||||||||||
|MazeB-v0|33.09|1.76|5.06|3.59|3.82|19.83|0|100.00|0.00|231.17|12.75|175.23|7.77|
|Littman89-v0|32.00|1.92|4.45|0.23|4.10|5.02|0|100.00|0.00|175.77|8.56|135.57|2.91|
|MiyazakiA-v0|30.88|2.55|3.60|0.35|3.05|4.85|0|97.31|1.94|537.27|19.88|288.27|11.65|
|MazeD-v0|27.02|2.03|3.02|0.24|2.77|3.73|0|100.00|0.00|196.13|8.34|174.43|6.30|
|Cassandra4x4-v0|20.24|1.92|2.85|0.22|2.42|3.42|0|83.70|1.21|294.07|14.41|111.87|5.11|
|Littman57-v0|22.98|1.29|9.44|15.58|3.98|64.56|0|90.24|0.00|54.10|1.42|31.17|0.64|
|**Not Aliased**||||||||||||||
|Maze4-v0|32.10|1.48|3.49|0.06|3.39|3.61|0|100.00|0.00|172.63|5.64|172.63|5.64|
|Maze5-v0|47.17|3.07|4.62|0.09|4.44|4.83|0|100.00|0.00|230.03|12.93|229.70|12.74|
|MazeA-v0|47.82|1.58|4.26|0.18|4.09|5.17|0|100.00|0.00|106.37|3.31|106.37|3.31|
|MazeF1-v0|11.51|0.36|1.80|0.04|1.70|1.86|0|100.00|0.00|14.00|0.00|14.00|0.00|
|MazeF2-v0|19.64|0.77|2.50|0.04|2.41|2.59|0|100.00|0.00|18.07|0.25|18.07|0.25|
|MazeF3-v0|29.86|1.08|3.36|0.06|3.22|3.44|0|100.00|0.00|28.70|1.10|28.70|1.10|
|Woods1-v0|9.53|0.46|1.63|0.02|1.58|1.67|0|100.00|0.00|54.50|1.12|54.50|1.12|
|Woods14-v0|70.47|1.55|9.54|0.23|9.00|9.95|0|100.00|0.00|39.03|2.24|39.03|2.24|

For 1000 trials in Exploration and 500 trials in Exploitation for all environments, computed 30 times for calculating all averages, having $bs_{max} = 2$:
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|31.91|1.83|20.10|15.71|3.87|58.87|0|39.95|3.25|1238.13|62.65|96.13|7.09|
|Woods101demi-v0|27.09|0.86|3.79|2.55|3.01|14.62|0|90.48|0.00|141.80|7.60|73.37|1.56|
|Maze10-v0|41.18|2.87|29.70|24.95|5.49|67.91|0|94.04|0.85|119.13|4.88|51.23|1.20|
|Woods102-v0|28.49|2.23|3.78|0.23|3.32|4.33|0|94.92|0.64|266.87|7.46|148.90|8.99|
|Woods100-v0|12.35|0.35|2.33|0.05|2.20|2.41|0|100.00|0.00|14.30|1.13|10.00|0.00|
|Woods101-v0|24.94|1.31|3.02|0.07|2.91|3.21|0|92.59|0.00|60.57|2.97|45.77|1.05|
|MazeE1-v0|25.64|1.44|3.67|1.17|3.07|7.59|0|64.07|3.60|1342.20|54.36|222.87|23.20|
|**Type II**      ||||||||||||||
|MazeF4-v0|31.36|1.16|4.50|0.10|4.28|4.71|0|100.00|0.00|41.53|3.33|31.53|0.76|
|Maze7-v0|28.54|1.23|4.34|0.08|4.15|4.51|0|100.00|0.00|40.17|1.59|33.00|0.00|
|MiyazakiB-v0|40.85|4.26|3.88|0.30|3.47|4.84|0|99.67|0.55|435.00|16.40|278.87|6.87|
|**Type I**       ||||||||||||||
|MazeB-v0|33.47|1.62|5.75|5.05|3.77|22.64|0|100.00|0.00|229.93|9.74|176.30|6.61|
|Littman89-v0|31.05|2.40|4.39|0.27|4.00|4.96|0|100.00|0.00|176.50|6.23|134.57|2.45|
|MiyazakiA-v0|32.98|2.45|3.51|0.20|3.18|4.00|0|95.60|2.26|663.63|39.43|313.17|18.89|
|MazeD-v0|27.43|2.21|3.00|0.23|2.68|3.61|0|99.97|0.19|199.07|8.33|175.30|5.40|
|Cassandra4x4-v0|23.21|3.07|2.80|0.26|2.48|3.59|0|86.09|1.59|433.50|23.36|134.57|8.11|
|Littman57-v0|22.93|1.32|11.01|17.21|4.06|65.67|0|90.33|0.44|53.97|1.40|31.00|0.26|
|**Not Aliased**||||||||||||||
|Maze4-v0|32.86|1.62|3.49|0.06|3.39|3.61|0|100.00|0.00|171.30|6.13|171.30|6.13|
|Maze5-v0|46.25|1.98|4.61|0.09|4.41|4.86|0|100.00|0.00|228.50|10.09|228.47|10.08|
|MazeA-v0|48.72|1.82|4.22|0.05|4.12|4.33|0|100.00|0.00|106.67|3.97|106.67|3.97|
|MazeF1-v0|11.46|0.31|1.79|0.04|1.73|1.85|0|100.00|0.00|14.07|0.25|14.07|0.25|
|MazeF2-v0|19.68|0.68|2.49|0.04|2.42|2.57|0|100.00|0.00|18.27|0.51|18.27|0.51|
|MazeF3-v0|29.70|0.88|3.39|0.06|3.27|3.59|0|100.00|0.00|28.37|0.60|28.37|0.60|
|Woods1-v0|9.42|0.49|1.62|0.02|1.60|1.68|0|100.00|0.00|54.37|1.20|54.37|1.20|
|Woods14-v0|70.18|1.54|9.51|0.25|8.87|9.83|0|100.00|0.00|39.33|2.20|39.33|2.20|


#### Experiments with all environments without GA and with more trials in exploration mode

For 5000 trials in Exploration and 500 trials in Exploitation for all environments, computed 30 times for calculating all averages, having $bs_{max} = 1$:
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|28.03|0.74|9.03|8.60|4.23|45.53|0|71.84|0.60|927.93|31.37|295.40|11.84|
|Woods101demi-v0|26.87|0.53|3.10|0.04|3.04|3.19|0|90.48|0.00|129.77|6.06|74.30|1.32|
|Maze10-v0|55.74|1.59|64.64|14.08|50.93|91.28|0|90.64|1.30|98.17|3.45|46.00|1.46|
|Woods102-v0|30.86|0.66|5.90|5.48|3.37|24.49|0|95.12|0.00|220.77|2.59|135.10|0.87|
|Woods100-v0|12.30|0.13|2.35|0.06|2.23|2.45|0|100.00|0.00|14.00|0.00|10.00|0.00|
|Woods101-v0|23.75|0.47|3.05|0.10|2.91|3.26|0|92.59|0.00|59.87|3.08|46.00|1.32|
|MazeE1-v0|24.70|0.94|5.73|3.87|3.38|15.88|0|99.88|0.19|1002.20|24.13|544.67|7.37|
|**Type II**      ||||||||||||||
|MazeF4-v0|28.08|0.66|4.49|0.09|4.29|4.64|0|100.00|0.00|39.43|2.25|31.40|0.49|
|Maze7-v0|26.40|0.81|4.34|0.08|4.18|4.51|0|100.00|0.00|39.77|1.28|33.07|0.36|
|MiyazakiB-v0|36.74|1.24|4.09|0.21|3.69|4.63|0|100.00|0.00|384.93|8.43|265.37|5.11|
|**Type I**       ||||||||||||||
|MazeB-v0|32.82|1.25|5.17|3.96|3.70|21.15|0|100.00|0.00|223.17|8.97|175.73|5.08|
|Littman89-v0|30.90|1.23|4.23|0.13|3.97|4.53|0|100.00|0.00|172.30|4.93|135.47|2.46|
|MiyazakiA-v0|31.54|1.12|4.14|2.33|3.35|16.63|0|100.00|0.00|501.40|10.69|319.53|6.21|
|MazeD-v0|24.88|0.86|2.88|0.08|2.71|3.01|0|100.00|0.00|193.03|6.72|175.83|6.22|
|Cassandra4x4-v0|22.32|1.11|3.17|0.21|2.56|3.54|0|85.19|0.00|299.50|9.42|119.80|4.18|
|Littman57-v0|24.24|0.65|11.87|19.60|3.97|74.23|0|90.33|0.44|54.17|1.44|31.03|0.41|
|**Not Aliased**||||||||||||||
|Maze4-v0|28.56|1.86|3.48|0.05|3.38|3.58|0|100.00|0.00|172.70|7.17|172.70|7.17|
|Maze5-v0|39.85|2.00|4.61|0.08|4.45|4.75|0|100.00|0.00|224.17|8.12|224.17|8.12|
|MazeA-v0|39.68|4.04|4.23|0.06|4.15|4.39|0|100.00|0.00|107.10|4.69|107.10|4.69|
|MazeF1-v0|10.73|0.22|1.80|0.03|1.71|1.85|0|100.00|0.00|14.07|0.25|14.07|0.25|
|MazeF2-v0|16.30|1.00|2.51|0.04|2.42|2.62|0|100.00|0.00|18.13|0.34|18.13|0.34|
|MazeF3-v0|23.48|0.61|3.37|0.06|3.23|3.51|0|100.00|0.00|28.47|0.62|28.47|0.62|
|Woods1-v0|9.59|0.48|1.63|0.02|1.58|1.67|0|100.00|0.00|54.23|1.23|54.23|1.23|
|Woods14-v0|61.04|2.49|9.50|0.18|8.94|9.84|0|100.00|0.00|39.93|2.25|39.93|2.25|


For 5000 trials in Exploration and 500 trials in Exploitation for all environments, computed 30 times for calculating all averages, having $bs_{max} = 2$:
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|31.58|1.06|9.46|8.64|3.65|42.55|0|71.80|1.42|1596.73|51.11|295.73|11.98|
|Woods101demi-v0|26.94|0.60|3.08|0.04|3.02|3.16|0|90.48|0.00|130.23|7.68|74.33|1.32|
|Maze10-v0|40.19|2.07|33.32|26.05|6.54|75.26|0|94.33|1.00|123.07|4.55|51.80|1.45|
|Woods102-v0|32.23|0.85|6.10|5.58|3.75|27.92|0|95.12|0.00|285.70|2.75|182.67|0.98|
|Woods100-v0|12.24|0.16|2.33|0.04|2.26|2.41|0|100.00|0.00|14.00|0.00|10.00|0.00|
|Woods101-v0|23.95|0.58|3.10|0.12|2.80|3.30|0|92.59|0.00|60.10|2.53|46.00|1.18|
|MazeE1-v0|28.69|1.54|3.87|0.25|3.39|4.42|0|99.57|0.36|1769.03|59.38|850.23|29.91|
|**Type II**      ||||||||||||||
|MazeF4-v0|28.13|0.60|4.51|0.08|4.37|4.67|0|100.00|0.00|40.77|3.07|31.63|0.80|
|Maze7-v0|26.45|0.78|4.34|0.11|4.07|4.57|0|100.00|0.00|39.83|1.65|33.03|0.18|
|MiyazakiB-v0|38.58|1.31|5.53|4.98|3.86|27.58|0|99.98|0.13|439.27|9.66|289.93|7.19|
|**Type I**       ||||||||||||||
|MazeB-v0|32.94|1.12|5.11|3.97|3.71|20.83|0|100.00|0.00|224.87|8.18|175.60|5.55|
|Littman89-v0|31.39|1.14|4.20|0.15|3.90|4.43|0|100.00|0.00|171.70|4.71|135.37|2.24|
|MiyazakiA-v0|34.76|1.42|4.28|2.05|3.24|15.20|0|100.00|0.00|687.63|19.03|405.43|7.06|
|MazeD-v0|25.21|0.91|2.91|0.11|2.72|3.17|0|100.00|0.00|192.63|5.09|175.93|4.84|
|Cassandra4x4-v0|29.17|1.68|3.55|0.41|2.78|4.26|0|87.65|0.00|560.50|21.80|182.90|3.97|
|Littman57-v0|24.43|0.81|9.22|13.28|3.96|58.41|0|90.24|0.00|54.17|1.16|31.23|0.62|
|**Not Aliased**||||||||||||||
|Maze4-v0|28.81|1.83|3.49|0.07|3.35|3.64|0|100.00|0.00|172.57|5.81|172.57|5.81|
|Maze5-v0|38.97|1.95|4.60|0.06|4.48|4.72|0|100.00|0.00|227.33|8.29|227.33|8.29|
|MazeA-v0|38.88|3.14|4.24|0.07|4.12|4.39|0|100.00|0.00|107.43|5.21|107.43|5.21|
|MazeF1-v0|10.73|0.24|1.81|0.04|1.72|1.89|0|100.00|0.00|14.07|0.25|14.07|0.25|
|MazeF2-v0|16.42|0.65|2.50|0.04|2.44|2.63|0|100.00|0.00|18.07|0.25|18.07|0.25|
|MazeF3-v0|23.69|1.01|3.37|0.06|3.23|3.50|0|100.00|0.00|28.53|0.76|28.53|0.76|
|Woods1-v0|9.51|0.41|1.62|0.02|1.58|1.67|0|100.00|0.00|54.50|1.23|54.50|1.23|
|Woods14-v0|60.02|2.00|9.55|0.24|9.13|10.16|0|100.00|0.00|38.47|2.17|38.47|2.17|


### Complexity and Optimal Performance by Maze

These measures come from Zatuchna and Bagnall paper :

> Zhanna V. Zatuchna and Anthony Bagnall. 2009. Learning Mazes with Aliasing States: An LCS Algorithm with Associative Perception. Adaptive Behavior - Animals, Animats, Software Agents, Robots, Adaptive Systems 17, 1 (February 2009), 28-57. DOI=http://dx.doi.org/10.1177/1059712308099230

$\phi$ is about the average distance to reward in maze. Its calculation depends mainly on the type of maze. Here is reported $\phi$' if the maze is aliased, the original $\phi$ otherwise.

$\psi$ is about the complexity of the maze that depends on the average distance to reward and on the average steps taken by trained Q-learning.

The question marks (**?**) highlight some discrepancies present in the original paper.

The asterisks (**\***) indicate pieces of information were not provided.

For further details, please see the original paper.

#### For Type III Aliased Mazes

|       |MazeE2      |Woods101demi|Maze10      |Woods102    |Woods100    |Woods101    |MazeE1       |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:-----------:|
|$\phi$'|2.33        |3.1         |5.17        |3.31        |2.33        |2.9         |3.07         |
|$\psi$ |251.2       |251 **?**   |171         |167         |166         |149         |167 **?**    |

#### For Type II Aliased Mazes

|       |MazeF4      |Maze7       |MiyazakiB   |
|-------|:----------:|:----------:|:----------:|
|$\phi$'|4.5 **?**   |4.33        |3.33        |
|$\psi$ |47 **?**    |82 **?**    |1.03        |

#### For Type I Aliased Mazes

|       |MazeB       |Littman89   |MiyazakiA   |MazeD       |Cassandra4x4|Littman57   |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
|$\phi$'|3.5         |3.77        |3.05        |2.75        |2.27        |3.71        |
|$\psi$ |1.26        |61 **?**    |69 **?**    |1.03        |1           |154 **?**   |

#### For Non Aliased Mazes

|       |Maze4       |Maze5       |MazeA       |MazeF1      |MazeF2      |MazeF3      |Woods1      |Woods14     |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
|$\phi$ |3.5         |4.61        |4.23        |1.8         |2.5         |3.38        |1.63        |9.5         |
|$\psi$ | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     |