In [None]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load BACS module
from bacs.agents.bacs import BACS, Configuration
from bacs.agents.bacs.utils.GymMazeWrapper import _maze_metrics, parse_metrics_to_df, plot_performance

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

# Agent - BACS - BENCHMARKING

## Main Parameters

In [None]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 5000
DO_ACTION_PLANNING_EXPLORE = False
DO_GA_EXPLORE = False
DO_SUBSUMPTION_EXPLORE = True
BETA_EXPLORE = 0.05
EPSILON = 0.8

NUMBER_OF_EXPLOIT_TRIALS = 500
DO_ACTION_PLANNING_EXPLOIT = False
DO_SUBSUMPTION_EXPLOIT = True
BETA_EXPLOIT = 0.00

LENGTH_OF_BEHAVIORAL_SEQUENCES = 1

NUMBER_OF_ITERATIONS_TO_BENCH = 30

## Full Details of Agent Parameters

In [None]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=DO_GA_EXPLORE,
    do_subsumption=DO_SUBSUMPTION_EXPLORE,
    do_action_planning=DO_ACTION_PLANNING_EXPLORE,
    action_planning_frequency=50,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

cfg_exploit = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=DO_SUBSUMPTION_EXPLOIT,
    do_action_planning=DO_ACTION_PLANNING_EXPLOIT,
    action_planning_frequency=50,
    beta=BETA_EXPLOIT,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

### Benchmarking - Maze

In [None]:
def bench_on_maze(env,n):
    avg_explore_list = []
    avg_exploit_list = []
    knowledge_list = []
    population_list = []
    numerosity_list = []
    reliable_list = []
    
    for i in range(n):
        
        # Initialize environment
        maze = gym.make(env.id)
        
        # Reset it, by putting an agent into random position
        situation = maze.reset()
    
        # Training of BACS - Exploration
        agent_explore = BACS(cfg_explore)
        population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
    
        # Using BACS - Exploitation
        agent_exploit = BACS(cfg_exploit, population_explore)
        agent_exploit.clean_population()
        population_exploit, metrics_exploit = agent_exploit.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS)

        # Plotting performance
        #metrics_df = parse_metrics_to_df(metrics_explore, metrics_exploit)
        #plot_performance(agent_exploit, maze, metrics_df, cfg_exploit, env.id)
        
        # Get average in exploration
        avg_step_explore = 0
        for trial in metrics_explore:
            avg_step_explore += trial['steps_in_trial']
        avg_step_explore /= NUMBER_OF_EXPLORE_TRIALS
        avg_explore_list.append(avg_step_explore)
        
        # Get average in exploitation
        avg_step_exploit = 0
        for trial in metrics_exploit:
            avg_step_exploit += trial['steps_in_trial']
        avg_step_exploit /= NUMBER_OF_EXPLOIT_TRIALS
        avg_exploit_list.append(avg_step_exploit)
        
        # Get other metrics
        knowledge_list.append(metrics_exploit[-1]['knowledge'])
        population_list.append(metrics_exploit[-1]['population'])
        numerosity_list.append(metrics_exploit[-1]['numerosity'])
        reliable_list.append(metrics_exploit[-1]['reliable'])
        
        print(env.id,'|','#' * (i+1) ,' ' * (n-(i+1)), '|')
    
    # Compute the means and standard deviations
    avg_explore = float(sum(avg_explore_list)) / n
    std_explore = statistics.pstdev(avg_explore_list)
    avg_exploit = float(sum(avg_exploit_list)) / n
    std_exploit = statistics.pstdev(avg_exploit_list)
    avg_knowledge = float(sum(knowledge_list)) / n
    std_knowledge = statistics.pstdev(knowledge_list)
    avg_population = float(sum(population_list)) / n
    std_population = statistics.pstdev(population_list)
    avg_numerosity = float(sum(numerosity_list)) / n
    std_numerosity = statistics.pstdev(numerosity_list)
    avg_reliable = float(sum(reliable_list)) / n
    std_reliable = statistics.pstdev(reliable_list)
    
    result = {
        'maze'             : env.id,
        'avg_explore'      : avg_explore,
        'std_explore'      : std_explore,
        'avg_exploit'      : avg_exploit,
        'std_exploit'      : std_exploit,
        'avg_knowledge'    : avg_knowledge,
        'std_knowledge'    : std_knowledge,
        'avg_population'   : avg_population,
        'std_population'   : std_population,
        'avg_numerosity'   : avg_numerosity,
        'std_numerosity'   : std_numerosity,
        'avg_reliable'     : avg_reliable,
        'std_reliable'     : std_reliable,
        'avg_exploit_list' : avg_exploit_list
    }
    
    print(result)
    
    return result

Custom functions for getting available environments in Gym depending on the type of mazes :

In [None]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE1") \
    or env.id.startswith("MazeE2") or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("Sutton") or env.id.startswith("MiyazakiB") \
    or env.id.startswith("Lab1")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods1-") \
    or env.id.startswith("Woods14") or env.id.startswith("Maze4") \
    or env.id.startswith("Maze5") or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Benchmarking without the genetic algorithms :

In [None]:
#maze_envs = [env for env in all_envs if filter_envs_na(env) or filter_envs_typeI(env) or filter_envs_typeII(env) or filter_envs_typeIII(env)]
maze_envs = [env for env in all_envs if  filter_envs_typeIII(env)]
results = parmap.map(bench_on_maze, maze_envs, NUMBER_OF_ITERATIONS_TO_BENCH, pm_pbar=True, pm_processes=8)

Parsing the previous result to get a markdown string for each environment :

In [None]:
markdown_str = ''

for item in results:
    markdown_str += '|' + item['maze'] + '|'
    markdown_str += "{:.2f}".format(item['avg_explore']) + '|'
    markdown_str += "{:.2f}".format(item['std_explore']) + '|'
    markdown_str += "{:.2f}".format(item['avg_exploit']) + '|'
    markdown_str += "{:.2f}".format(item['std_exploit']) + '|'
    markdown_str += "{:.2f}".format(min(item['avg_exploit_list'])) + '|'
    markdown_str += "{:.2f}".format(max(item['avg_exploit_list'])) + '|'
    markdown_str += '0' + '|'
    markdown_str += "{:.2f}".format(item['avg_knowledge']) + '|'
    markdown_str += "{:.2f}".format(item['std_knowledge']) + '|'
    markdown_str += "{:.2f}".format(item['avg_population']) + '|'
    markdown_str += "{:.2f}".format(item['std_population']) + '|'
    markdown_str += "{:.2f}".format(item['avg_reliable']) + '|'
    markdown_str += "{:.2f}".format(item['std_reliable']) + '|'
    markdown_str += '\n'
    
print(markdown_str)

### Last results
 
The parameters used are:
 
- no use of action planning
- use of subsumption in the anticipatory learning process
- use of subsumption in the genetic algorithm if this last one is used
- $\gamma$ = 0.95
- $\theta_i$ = 0.1
- $\theta_r$ = 0.9
- $u_{max}$ = 8 (length of the classifier)
- $\theta_{exp}$ = 20
- $\beta$ = 0.05
- $\epsilon$ = 0.8
- $bs_{max} = 1$
 
If genetic algorithms are used:

- $\theta_{ga}$ = 100
- $\theta_{as}$ = 20
- $\mu$ = 0.3
- $\chi$ = 0.8
 
The mazes in the following table have been sorted by aliasing type and then by complexity top-down.

#### Experiments with all environments without GA

For 1000 trials in Exploration and 500 trials in Exploitation for all environments, computed 30 times for calculating all averages, having $bs_{max} = 1$:
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|31.77|2.00|28.13|21.12|4.30|69.73|0|57.23|2.57|1444.53|101.01|204.53|14.28|
|Woods101demi-v0|29.02|1.60|12.64|7.21|3.09|24.20|0|90.63|0.59|292.20|36.30|80.50|2.32|
|Maze10-v0|47.41|3.18|62.00|12.43|47.51|90.21|0|89.01|3.39|167.67|13.32|48.43|6.35|
|Woods102-v0|30.32|1.98|7.37|5.10|3.37|19.17|0|92.52|1.60|329.33|14.62|124.67|4.66|
|Woods100-v0|10.05|0.28|2.33|0.05|2.22|2.41|0|100.00|0.00|23.97|2.18|10.03|0.18|
|Woods101-v0|38.62|1.94|3.87|0.39|3.55|4.69|0|88.52|2.59|63.43|5.76|43.30|1.39|
|MazeE1-v0|26.91|1.49|4.88|2.69|3.18|15.52|0|73.00|3.35|1505.10|86.52|310.73|20.87|
|**Type II**      ||||||||||||||
|MazeF4-v0|29.77|2.03|4.49|0.10|4.26|4.70|0|100.00|0.00|56.50|9.81|34.03|1.85|
|Maze7-v0|28.05|2.69|5.74|5.28|4.09|27.57|0|98.10|2.91|44.90|3.94|34.00|1.24|
|Sutton-v0|74.03|4.87|47.84|17.00|8.01|72.02|0|81.13|1.15|2044.13|135.82|432.00|18.79|
|Lab1-v0|98.93|0.49|99.85|0.16|99.50|100.00|0|80.06|2.20|691.07|61.65|283.87|9.91|
|MiyazakiB-v0|39.64|5.89|4.43|1.75|3.52|13.19|0|94.90|1.73|590.63|72.77|266.90|16.09|
|**Type I**       ||||||||||||||
|MazeB-v0|39.44|3.25|5.40|0.49|5.00|6.00|0|97.75|2.15|274.23|19.50|181.30|10.13|
|Littman89-v0|25.00|2.48|4.24|0.23|3.82|4.72|0|94.72|0.88|220.20|11.58|134.93|4.46|
|MiyazakiA-v0|25.70|2.66|3.72|1.45|3.13|11.49|0|90.60|3.55|941.43|84.76|303.00|25.42|
|MazeD-v0|37.49|3.44|4.00|0.00|4.00|4.00|0|92.05|2.84|245.83|18.20|175.27|8.06|
|Cassandra4x4-v0|16.79|2.60|3.01|0.31|2.58|3.72|0|81.28|1.99|716.93|59.33|145.87|10.43|
|Littman57-v0|23.49|1.80|15.46|22.56|3.80|72.75|0|90.33|0.44|64.37|3.55|31.87|1.06|
|**Not Aliased**||||||||||||||
|Maze4-v0|32.70|2.18|3.50|0.07|3.39|3.67|0|100.00|0.00|172.50|4.81|172.50|4.81|
|Maze5-v0|46.77|2.50|4.64|0.10|4.41|4.88|0|100.00|0.00|228.83|8.86|228.80|8.84|
|MazeA-v0|63.28|2.24|6.00|0.00|6.00|6.00|0|100.00|0.00|106.27|3.94|106.27|3.94|
|MazeF1-v0|11.87|0.40|1.81|0.03|1.76|1.88|0|100.00|0.00|14.07|0.25|14.07|0.25|
|MazeF2-v0|20.07|0.75|2.49|0.04|2.39|2.57|0|100.00|0.00|18.17|0.37|18.17|0.37|
|MazeF3-v0|29.62|1.03|3.37|0.06|3.18|3.48|0|100.00|0.00|28.80|1.19|28.80|1.19|
|Woods1-v0|9.42|0.56|1.63|0.02|1.57|1.66|0|100.00|0.00|54.13|1.15|54.13|1.15|
|Woods14-v0|70.65|1.56|9.55|0.25|9.00|10.28|0|100.00|0.00|39.30|2.35|39.30|2.35|


For 1000 trials in Exploration and 500 trials in Exploitation for all environments, computed 30 times for calculating all averages, having $bs_{max} = 2$:
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|34.03|1.90|27.40|22.32|3.79|72.69|0|52.63|3.16|1946.00|149.78|181.23|15.14|
|Woods101demi-v0|29.99|1.08|13.13|7.52|3.07|24.62|0|90.71|0.71|376.60|28.23|81.67|2.49|
|Maze10-v0|43.32|4.38|48.07|27.00|5.83|92.87|0|93.26|2.20|242.63|25.33|50.03|3.07|
|Woods102-v0|30.98|1.97|6.56|3.53|3.43|13.11|0|95.08|0.50|397.70|30.48|153.10|7.15|
|Woods100-v0|11.60|0.27|2.33|0.05|2.24|2.46|0|100.00|0.00|32.47|2.87|10.07|0.25|
|Woods101-v0|38.89|2.36|3.96|0.42|3.62|4.89|0|88.27|2.72|64.63|6.58|43.03|1.43|
|MazeE1-v0|27.83|1.58|3.76|0.96|3.11|8.71|0|68.75|3.83|1914.57|123.32|307.47|33.61|
|**Type II**      ||||||||||||||
|MazeF4-v0|30.32|2.43|5.63|6.28|4.23|39.43|0|100.00|0.00|70.17|24.18|34.43|1.82|
|Maze7-v0|27.12|1.69|4.35|0.10|4.14|4.65|0|99.05|2.27|49.73|7.48|34.33|1.56|
|Sutton-v0|76.14|6.00|52.15|14.11|13.72|83.02|0|80.62|1.73|2896.23|206.35|463.97|28.36|
|Lab1-v0|98.92|0.67|99.84|0.20|99.23|100.00|0|80.45|2.68|880.70|93.23|309.50|19.29|
|MiyazakiB-v0|40.95|6.54|3.96|0.38|3.49|4.97|0|95.07|1.66|644.27|91.62|272.20|20.23|
|**Type I**       ||||||||||||||
|MazeB-v0|40.07|3.50|5.30|0.46|5.00|6.00|0|97.39|3.08|284.00|17.88|177.97|13.35|
|Littman89-v0|26.06|3.07|4.30|0.29|3.77|5.12|0|95.02|0.59|231.13|22.73|135.73|4.62|
|MiyazakiA-v0|28.38|4.02|3.88|1.27|3.12|9.86|0|89.47|3.59|1114.73|130.85|312.90|29.52|
|MazeD-v0|36.30|2.81|4.00|0.00|4.00|4.00|0|91.77|4.03|247.17|17.56|174.53|8.20|
|Cassandra4x4-v0|19.31|4.26|2.96|0.35|2.47|3.89|0|84.32|2.32|951.67|138.24|171.07|14.61|
|Littman57-v0|24.04|1.97|16.71|22.59|3.77|60.61|0|90.24|0.00|63.60|3.65|31.93|1.06|
|**Not Aliased**||||||||||||||
|Maze4-v0|31.83|1.70|3.49|0.04|3.40|3.58|0|100.00|0.00|172.97|7.18|172.97|7.18|
|Maze5-v0|45.91|3.62|4.65|0.06|4.49|4.77|0|100.00|0.00|229.90|10.89|229.83|10.94|
|MazeA-v0|63.20|1.99|6.1|0.3|6.0|7.0|0|100.00|0.00|108.03|4.87|108.03|4.87
|MazeF1-v0|11.83|0.41|1.80|0.03|1.76|1.86|0|100.00|0.00|14.00|0.00|14.00|0.00|
|MazeF2-v0|19.92|0.67|2.50|0.04|2.40|2.57|0|100.00|0.00|18.27|0.44|18.27|0.44|
|MazeF3-v0|29.94|1.19|3.35|0.06|3.20|3.47|0|100.00|0.00|28.87|1.20|28.87|1.20|
|Woods1-v0|9.73|0.58|1.63|0.02|1.58|1.67|0|100.00|0.00|54.60|1.50|54.60|1.50|
|Woods14-v0|70.43|1.42|9.56|0.25|9.10|10.03|0|100.00|0.00|39.43|2.36|39.43|2.36|

{'maze': 'MazeA-v0', 'avg_explore': 63.204566666666665, 'std_explore': 1.9944947678269036, 'avg_exploit': 6.1, 'std_exploit': 0.3, 'avg_knowledge': 100.0, 'std_knowledge': 0.0, 'avg_population': 108.03333333333333, 'std_population': 4.874992877487674, 'avg_numerosity': 108.03333333333333, 'std_numerosity': 4.874992877487674, 'avg_reliable': 108.03333333333333, 'std_reliable': 4.874992877487674, 'avg_exploit_list': [6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 6.0, 7.0, 6.0, 6.0, 6.0]}

#### Experiments with type-III aliased mazes with more trials in exploration without GA

For 5000 trials in Exploration and 500 trials in Exploitation for type-III aliased environments, computed 30 times for calculating all averages, having $bs_{max} = 1$
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|29.37|1.58|16.82|22.32|4.45|67.60|0|72.17|1.17|2305.57|105.28|422.57|26.19|
|Woods101demi-v0|27.43|0.64|14.49|5.30|3.01|23.68|0|90.56|0.43|324.10|19.44|83.97|2.64|
|Maze10-v0|45.13|1.62|61.56|12.52|49.88|92.67|0|92.91|1.85|218.37|20.67|53.83|6.81|
|Woods102-v0|27.75|0.53|5.84|4.73|3.28|24.75|0|93.33|1.12|343.47|17.45|135.57|1.63|
|Woods100-v0|9.91|0.13|2.32|0.05|2.22|2.42|0|100.00|0.00|23.97|1.99|10.03|0.18|
|Woods101-v0|36.77|2.14|4.15|0.46|3.50|4.75|0|90.62|2.08|85.77|19.27|43.53|1.41|
|MazeE1-v0|26.81|1.11|5.04|3.59|3.28|19.35|0|93.88|1.25|2594.33|153.27|595.27|24.65|

For 5000 trials in Exploration and 500 trials in Exploitation for type-III aliased environments, computed 30 times for calculating all averages, having $bs_{max} = 2$
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|33.92|3.12|21.46|22.55|4.65|64.97|0|74.56|1.96|3533.47|206.84|475.23|30.90|
|Woods101demi-v0|27.42|0.70|15.93|6.36|3.11|25.01|0|90.56|0.43|435.03|25.61|89.40|2.86|
|Maze10-v0|42.22|2.44|57.32|14.23|9.20|89.12|0|95.46|0.91|366.37|28.54|65.17|3.70|
|Woods102-v0|28.49|1.06|7.45|7.84|3.58|41.71|0|95.16|0.22|457.57|26.52|184.60|2.48|
|Woods100-v0|11.35|0.16|2.33|0.05|2.25|2.47|0|100.00|0.00|31.90|2.89|10.03|0.18|
|Woods101-v0|37.43|2.88|3.98|0.39|3.50|4.71|0|90.00|2.56|82.97|18.97|43.67|1.42|
|MazeE1-v0|28.13|1.52|4.14|1.48|3.33|9.79|0|93.54|1.11|4048.50|219.68|869.87|54.00|

#### Particular experiments with MazeE2 and Maze10 without GA

For 10000 trials in Exploration and 500 trials in Exploitation for MazeE2 environment, computed 30 times for calculating all averages, having $bs_{max} = 1$
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|MazeE2-v0|28.36|1.28|16.38|18.64|3.78|65.50|0|75.44|1.57|2948.10|93.86|483.57|19.94|

(27.812, 24.302, 6.002, 28.094, 6.144, 54.656, 5.244, 5.406, 25.27, 52.564, 5.284, 6.03, 5.3, 4.906, 4.872, 5.412, 45.198, 6.022, 4.784, 5.174, 6.19, 65.502, 6.308, 54.572, 5.608, 5.426, 3.782, 4.94, 5.388, 5.176)



For 10000 trials in Exploration and 500 trials in Exploitation for Maze10 environment, computed 30 times for calculating all averages, having $bs_{max} = 2$
 
| | Exploration Avg | Exploration Std | Exploration Avg | Exploration Std | Best Exploration | Worst Exploration | Successful tries | Knowledge Avg | Knowledge Std | Population Avg | Population Std | Reliable Avg | Reliable Std |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     ||||||||||||||
|Maze10-v0|41.19|2.34|53.48|18.79|7.58|89.03|0|95.89|0.76|457.77|32.45|74.30|4.04|

(46.608, 9.518, 53.008, 46.236, 53.35, 53.078, 7.576, 76.954, 57.848, 89.028, 55.536, 52.224, 25.32, 60.534, 59.174, 22.034, 61.114, 41.318, 46.916, 56.712, 51.61, 73.332, 67.266, 59.646, 88.24, 45.022, 56.422, 56.726, 54.882, 77.198)

### Complexity and Optimal Performance by Maze

These measures come from Zatuchna and Bagnall paper :

> Zhanna V. Zatuchna and Anthony Bagnall. 2009. Learning Mazes with Aliasing States: An LCS Algorithm with Associative Perception. Adaptive Behavior - Animals, Animats, Software Agents, Robots, Adaptive Systems 17, 1 (February 2009), 28-57. DOI=http://dx.doi.org/10.1177/1059712308099230

$\phi$ is about the average distance to reward in maze. Its calculation depends mainly on the type of maze. Here is reported $\phi$' if the maze is aliased, the original $\phi$ otherwise.

$\psi$ is about the complexity of the maze that depends on the average distance to reward and on the average steps taken by trained Q-learning.

The question marks (**?**) highlight some discrepancies present in the original paper.

The asterisks (**\***) indicate pieces of information were not provided.

For further details, please see the original paper.

#### For Type III Aliased Mazes

|       |MazeE2      |Woods101demi|Maze10      |Woods102    |Woods100    |Woods101    |MazeE1       |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:-----------:|
|$\phi$'|2.33        |3.1         |5.17        |3.31        |2.33        |2.9         |3.07         |
|$\psi$ |251.2       |251 **?**   |171         |167         |166         |149         |167 **?**    |

#### For Type II Aliased Mazes

|       |MazeF4      |Maze7       |Sutton      |Lab1        |MiyazakiB   |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|
|$\phi$'|4.5 **?**   |4.33        | **\***     | **\***     |3.33        |
|$\psi$ |47 **?**    |82 **?**    | **?**      | **?**      |1.03        |

#### For Type I Aliased Mazes

|       |MazeB       |Littman89   |MiyazakiA   |MazeD       |Cassandra4x4|Littman57   |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
|$\phi$'|3.5         |3.77        |3.05        |2.75        |2.27        |3.71        |
|$\psi$ |1.26        |61 **?**    |69 **?**    |1.03        |1           |154 **?**   |

#### For Non Aliased Mazes

|       |Maze4       |Maze5       |MazeA       |MazeF1      |MazeF2      |MazeF3      |Woods1      |Woods14     |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
|$\phi$ |3.5         |4.61        |4.23        |1.8         |2.5         |3.38        |1.63        |9.5         |
|$\psi$ | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     |