In [None]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS ACS2 module
from lcs.agents.acs2 import ACS2, Configuration
from bacs.agents.bacs.utils.GymMazeWrapper import _maze_metrics, parse_metrics_to_df, plot_performance

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

# Agent - ACS2 - BENCHMARKING

## Main Parameters

In [None]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 2000
DO_ACTION_PLANNING_EXPLORE = False
DO_GA_EXPLORE = False
DO_SUBSUMPTION_EXPLORE = True
BETA_EXPLORE = 0.025
EPSILON = 0.3

NUMBER_OF_EXPLOIT_TRIALS = 500
DO_ACTION_PLANNING_EXPLOIT = False
DO_SUBSUMPTION_EXPLOIT = True
BETA_EXPLOIT = 0.00

NUMBER_OF_ITERATIONS_TO_BENCH = 30

## Full Details of Agent Parameters

In [None]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=DO_GA_EXPLORE,
    do_subsumption=DO_SUBSUMPTION_EXPLORE,
    do_action_planning=DO_ACTION_PLANNING_EXPLORE,
    action_planning_frequency=50,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8
)

cfg_exploit = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=DO_SUBSUMPTION_EXPLOIT,
    do_action_planning=DO_ACTION_PLANNING_EXPLOIT,
    action_planning_frequency=50,
    beta=BETA_EXPLOIT,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8
)

### Benchmarking - Maze

In [None]:
def bench_on_maze(env,n):
    avg_explore_list = []
    avg_exploit_list = []
    
    for i in range(n):
        
        # Initialize environment
        maze = gym.make(env.id)
        
        # Reset it, by putting an agent into random position
        situation = maze.reset()
    
        # Training of ACS2 - Exploration
        agent_explore = ACS2(cfg_explore)
        population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
    
        # Using ACS2 - Exploitation
        agent_exploit = ACS2(cfg_exploit, population_explore)
        population_exploit, metrics_exploit = agent_exploit.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS)

        # Plotting performance
        #metrics_df = parse_metrics_to_df(metrics_explore, metrics_exploit)
        #plot_performance(agent_exploit, maze, metrics_df, cfg_exploit, env.id)
        
        # Get average in exploration
        avg_step_explore = 0
        for trial in metrics_explore:
            avg_step_explore += trial['steps_in_trial']
        avg_step_explore /= NUMBER_OF_EXPLORE_TRIALS
        avg_explore_list.append(avg_step_explore)
        
        # Get average in exploitation
        avg_step_exploit = 0
        for trial in metrics_exploit:
            avg_step_exploit += trial['steps_in_trial']
        avg_step_exploit /= NUMBER_OF_EXPLOIT_TRIALS
        avg_exploit_list.append(avg_step_exploit)
        
        print(env.id,'|','#' * (i+1) ,' ' * (n-(i+1)), '|')
    
    # Compute the maen and standard deviation
    avg_explore = float(sum(avg_explore_list)) / n
    std_explore = statistics.pstdev(avg_explore_list)
    avg_exploit = float(sum(avg_exploit_list)) / n
    std_exploit = statistics.pstdev(avg_exploit_list)
    
    result = {
        'maze' : env.id,
        'avg_explore' : avg_explore,
        'std_explore' : std_explore,
        'avg_exploit' : avg_exploit,
        'std_exploit' : std_exploit,
        'knowledge'   : metrics_exploit[-1]['knowledge'],
        'population'  : metrics_exploit[-1]['population'],
        'numerosity'  : metrics_exploit[-1]['numerosity'],
        'reliable'    : metrics_exploit[-1]['reliable']
    }
    
    print(result)
    
    return result

Custom functions for getting available environments in Gym depending on the type of mazes :

In [None]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE") \
    or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("Sutton") or env.id.startswith("MiyazakiB") \
    or env.id.startswith("Lab1")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods1-") \
    or env.id.startswith("Woods14") or env.id.startswith("Maze4") \
    or env.id.startswith("Maze5") or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Benchmarking without the genetic algorithms on mazes :

In [None]:
maze_envs = [env for env in all_envs if filter_envs_na(env) or filter_envs_typeI(env) or filter_envs_typeII(env) or filter_envs_typeIII(env)]

parmap.map(bench_on_maze, maze_envs, NUMBER_OF_ITERATIONS_TO_BENCH, pm_pbar=True, pm_processes=16)

Benchmarking with the genetic algorithms :

In [None]:
cfg_explore.do_ga = True

maze_envs = [env for env in all_envs if filter_envs_na(env) or filter_envs_typeI(env) or filter_envs_typeII(env) or filter_envs_typeIII(env)]

parmap.map(bench_on_maze, maze_envs, NUMBER_OF_ITERATIONS_TO_BENCH, pm_pbar=True, pm_processes=16)

### Last results
 
The parameters used are:
 
- no use of action planning
- use of subsumption in the anticipatory learning process
- use od subsumption in the genetic algorithm is this last one is used
- $\gamma$ = 0.95
- $\theta_i$ = 0.1
- $\theta_r$ = 0.9
- $u_{max}$ = 8 (length of the classifier)
- $\theta_{exp}$ = 20
- $\beta$ = 0.025
- $\epsilon$ = 0.3
 
If genetic algorithms are used:

- $\theta_{ga}$ = 100
- $\theta_{as}$ = 20
- $\mu$ = 0.3
- $\chi$ = 0.8,
 
 For 2000 trials in Exploration and 500 trials in Exploitation for all environments, computed 30 times for calculating all averages.
 
 
| | Exploration Avg W/out GA | Exploration Std W/out GA | Exploitation Avg W/out GA | Exploitation Std W/out GA | Exploration Avg W/ GA | Exploration Std W/ GA | Exploitation Avg W/ GA | Exploitation Std W/ GA |
|------------|:-----:|:-----:|:----:|:----:|:----:|:----:|:----:|:----:|
|**Type III**     |||||||||
|MazeE2      | 25.17 | 1.332 | 60.33 | 4.575 | 31.22 | 3.782 | 73.74 | 4.211 |
|Woods101demi| 26.50 | 1.141 | 66.52 | 2.375 | 33.75 | 3.289 | 72.05 | 7.434 |
|Maze10      | 55.02 | 1.832 | 76.56 | 9.228 | 79.00 | 2.248 | 94.56 | 1.085 |
|Woods102    | 21.93 | 1.401 | 54.18 | 8.206 | 42.10 | 4.218 | 85.45 | 10.37 |
|Woods100    | 8.594 | 0.331 | 34.83 | 2.002 | 10.93 | 1.620 | 34.63 | 1.922 |
|Woods101    | 13.03 | 0.760 | 41.27 | 2.081 | 24.99 | 3.489 | 57.61 | 22.14 |
|MazeE1      | 11.63 | 27.47 | 1.585 | 9.264 | 17.38 | 3.555 | 57.56 | 12.90 |
|**Type II**      |||||||||
|MazeF4      | 51.39 | 12.77 | 42.49 | 17.94 | 46.86 | 10.29 | 53.90 | 24.13 |
|Maze7       | 54.26 | 9.730 | 49.51 | 12.44 | 43.99 | 11.46 | 50.72 | 23.69 |
|Sutton      | 57.33 | 19.85 | 63.96 | 16.45 | 81.13 | 9.729 | 90.43 | 11.75 |
|Lab1        | 99,71 | 0.531 | 100.0 | 0.000 | 99.62 | 0.648 | 100.0 | 0.000 |
|MiyazakiB   | 10.57 | 2.333 | 4.417 | 1.724 | 41.69 | 11.24 | 68.37 | 36.96 |
|**Type I**       |||||||||
|MazeB       | 17.79 | 3.767 | 19.13 | 31.72 | 35.38 | 6.805 | 93.80 | 23.20 |
|Littman89   | 9.144 | 2.154 | 7.343 | 7.404 | 16.14 | 4.584 | 28.71 | 21.20 |
|MiyazakiA   | 7.433 | 1.025 | 3.967 | 1.682 | 11.32 | 5.081 | 36.36 | 28.61 |
|MazeD       | 9.900 | 2.025 | 4.700 | 1.038 | 19.53 | 5.741 | 52.90 | 47.11 |
|Cassandra4x4| 4.906 | 0.594 | 4.330 | 7.247 | 7.126 | 2.803 | 26.82 | 31.40 |
|Littman57   | 7.791 | 3.017 | 7.367 | 13.16 | 34.40 | 17.43 | 75.20 | 8.331 |
|**Non Aliased**|||||||||
|Maze4       | 9.098 | 1.672 | 4.247 | 0.387 | 14.18 | 6.714 | 23.97 | 26.55 |
|Maze5       | 13.73 | 1.707 | 5.982 | 1.533 | 27.05 | 11.14 | 49.78 | 30.55 |
|MazeA       | 17.90 | 7.791 | 7.667 | 1.043 | 59.44 | 9.784 | 78.77 | 38.50 |
|MazeF1      | 3.271 | 0.404 | 2.137 | 0.292 | 6.903 | 3.890 | 20.35 | 31.97 |
|MazeF2      | 5.118 | 1.979 | 2.737 | 0.205 | 9.850 | 4.802 | 22.91 | 28.98 |
|MazeF3      | 6.302 | 0.436 | 3.575 | 0.259 | 21.16 | 9.684 | 31.85 | 34.34 |
|Woods1      | 3.403 | 0.511 | 2.190 | 0.301 | 3.876 | 1.500 | 10.02 | 20.67 |
|Woods14     | 35.05 | 10.28 | 10.95 | 5.687 | 65.24 | 11.65 | 70.19 | 23.65 |


### Complexity and Optimal Performance by Maze

These to measures come from Zatuchna and Bagnall paper :

> Zhanna V. Zatuchna and Anthony Bagnall. 2009. Learning Mazes with Aliasing States: An LCS Algorithm with Associative Perception. Adaptive Behavior - Animals, Animats, Software Agents, Robots, Adaptive Systems 17, 1 (February 2009), 28-57. DOI=http://dx.doi.org/10.1177/1059712308099230

$\phi$ is about the average distance to reward in maze. Its calculation depends mainly on the type of maze. Here is reported $\phi$' if the maze is aliased, the original $\phi$ otherwise.

$\psi$ is about the complexity of the maze that depends on the average distance to reward and on the average steps taken by trained Q-learning.

The question marks (**?**) highlight some discrepancies present in the original paper.

The asterisks (**\***) indicate pieces of information were not provided.

For further details, please see the original paper.

#### For Type III Aliased Mazes

|       |MazeE2      |Woods101demi|Maze10      |Woods102    |Woods100    |Woods101    |MazeE1       |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:-----------:|
|$\phi$'|2.33        |3.1         |5.17        |3.31        |2.33        |2.9         |3.07         |
|$\psi$ |251.2       |251 **?**   |171         |167         |166         |149         |167 **?**    |

#### For Type II Aliased Mazes

|       |MazeF4      |Maze7       |Sutton      |Lab1        |MiyazakiB   |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|
|$\phi$'|4.5 **?**   |4.33        |Sutton      |Lab1        |3.33        |
|$\psi$ |47 **?**    |82 **?**    | **?**      | **?**      |1.03        |

#### For Type I Aliased Mazes

|       |MazeB       |Littman89   |MiyazakiA   |MazeD       |Cassandra4x4|Littman57   |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
|$\phi$'|3.5         |3.77        |3.05        |2.75        |2.27        |3.71        |
|$\psi$ |1.26        |61 **?**    |69 **?**    |1.03        |1           |154 **?**   |

#### For Non Aliased Mazes

|       |Maze4       |Maze5       |MazeA       |MazeF1      |MazeF2      |MazeF3      |Woods1      |Woods14     |
|-------|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
|$\phi$ |3.5         |4.61        |4.23        |1.8         |2.5         |3.38        |1.63        |9.5         |
|$\psi$ | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     | **\***     |