In [None]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS ACSM module
from lcs.agents.macs import MACS, Configuration
from lcs.agents.macs.utils.GymMazeWrapper import _maze_metrics, parse_metrics_to_df, plot_performance

# Load environments
import gym
import gym_maze

# Allow to parallelize all benchmarks to do
import parmap

# Agent - MACS - BENCHMARKING

## Main Parameters

In [None]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 1000
DO_ACTION_PLANNING_EXPLORE = False
DO_GA_EXPLORE = False
DO_SUBSUMPTION_EXPLORE = True
BETA_EXPLORE = 0.025
PROBABILITY_X = 0.3

NUMBER_OF_EXPLOIT_TRIALS = 500
DO_ACTION_PLANNING_EXPLOIT = False
DO_SUBSUMPTION_EXPLOIT = True
BETA_EXPLOIT = 0.00

## Full Details of Agent Parameters

In [None]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=DO_GA_EXPLORE,
    do_subsumption=DO_SUBSUMPTION_EXPLORE,
    do_action_planning=DO_ACTION_PLANNING_EXPLORE,
    action_planning_frequency=50,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    prob_x=PROBABILITY_X,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8
)

cfg_exploit = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=DO_SUBSUMPTION_EXPLOIT,
    do_action_planning=DO_ACTION_PLANNING_EXPLOIT,
    action_planning_frequency=50,
    beta=BETA_EXPLOIT,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    prob_x=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8
)

### Benchmarking - Maze

In [None]:
def bench_on_maze(env):
    avg_explore_list = []
    avg_exploit_list = []
    
    for i in range(30):
        
        # Initialize environment
        maze = gym.make(env.id)
        
        # Reset it, by putting an agent into random position
        situation = maze.reset()
    
        # Training of MACS - Exploration
        agent_explore = MACS(cfg_explore)
        population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
    
        # Using MACS - Exploitation
        agent_exploit = MACS(cfg_exploit, population_explore)
        agent_exploit.clean_population()
        population_exploit, metrics_exploit = agent_exploit.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS)

        # Plotting performance
        #metrics_df = parse_metrics_to_df(metrics_explore, metrics_exploit)
        #plot_performance(agent_exploit, maze, metrics_df, cfg_exploit, env.id)
        
        # Get average in exploration
        avg_step_explore = 0
        for trial in metrics_explore:
            avg_step_explore += trial['steps_in_trial']
        avg_step_explore /= NUMBER_OF_EXPLORE_TRIALS
        avg_explore_list.append(avg_step_explore)
        
        # Get average in exploitation
        avg_step_exploit = 0
        for trial in metrics_exploit:
            avg_step_exploit += trial['steps_in_trial']
        avg_step_exploit /= NUMBER_OF_EXPLOIT_TRIALS
        avg_exploit_list.append(avg_step_exploit)
        
        print(env.id,'|','#' * (i+1) ,' ' * (30-(i+1)), '|')
        
    avg_explore = float(sum(avg_explore_list)) / 30
    avg_exploit = float(sum(avg_exploit_list)) / 30
    
    print("Statistical Average number of steps to solve ", env.id ," is ",avg_explore, " for a total of ", NUMBER_OF_EXPLORE_TRIALS, " trials in EXPLORATION and is ",avg_exploit, " for a total of ", NUMBER_OF_EXPLOIT_TRIALS, " trials in EXPLOITATION")
 
    return {'maze' : env.id,'avg_explore' : avg_explore,'avg_exploit' : avg_exploit}

Benchmarking without the genetic algorithms :

In [None]:
# Custom function for obtaining available environments
filter_envs = lambda env: env.id.startswith("Maze") or env.id.startswith("Woods") \
    or env.id.startswith("Littman") or env.id.startswith("Miyazaki")

all_envs = [env for env in gym.envs.registry.all()]
maze_envs = [env for env in all_envs if filter_envs(env)]

parmap.map(bench_on_maze, maze_envs, pm_pbar=True)
#y = [bench_on_maze(x) for x in maze_envs]

Benchmarking with the genetic algorithms :

In [None]:
#cfg_explore.do_ga = True

# Custom function for obtaining available environments
#filter_envs = lambda env: env.id.startswith("Maze") or env.id.startswith("Woods") \
#    or env.id.startswith("Littman") or env.id.startswith("Miyazaki")

#all_envs = [env for env in gym.envs.registry.all()]
#maze_envs = [env for env in all_envs if filter_envs(env)]

#parmap.map(bench_on_maze, maze_envs, pm_pbar=True)
#y = [bench_on_maze(x) for x in maze_envs]

 ### Last results
 
 In all case, without action planning, with subsumption exploration, $\gamma$ = 0.95, $\theta_i$ = 0.1,$\theta_r$ = 0.9, $u_{max}$ = 8 (length of the classifier), $\theta_{exp}$ = 20,
 
 In exploration, $\beta$ = 0.025, $prob_x$ = 0.3.
 
 If genetic algorithms are used, $\theta_{ga}$ = 100, $\theta_{as}$ = 20, $\mu$ = 0.3 and $\chi$ = 0.8,
 
 For 1000 trials in Exploration and 500 trials in Exploitation for all environments, computed 30 times for calculating all averages.
 
 
| env.id      | Exploration Avg W\out GA | Exploitation Avg W\out GA | Exploration Avg W\ GA | Exploitation Avg W\ GA |
| ----------- |:------------------------:|:-------------------------:|:---------------------:|:--------------------:|
| MazeE1      | 0 | 0 | 0 | 0 |
| MazeE2      | 0 | 0 | 0 | 0 |
| MazeF1      | 40.09 | 1.846 | 0 | 0 |
| MazeF2      | 76.38 | 2.521 | 0 | 0 |
| MazeF3      | 77.95 | 3.387 | 0 | 0 |
| MazeF4      | 79.59 | 61.39 | 0 | 0 |
| Woods1      | 0 | 0 | 0 | 0 |
| Woods14     | 92.06 | 15.04 | 0 | 0 |
| Woods100    | 0 | 0 | 0 | 0 |
| Woods101    | 0 | 0 | 0 | 0 |
| Woods101demi| 0 | 0 | 0 | 0 |
| Woods102    | 0 | 0 | 0 | 0 |
| Littman57   | 58.69 | 70.95 | 0 | 0 |
| Maze4       | 71.93 | 6.997 | 0 | 0 |
| Maze5       | 0 | 0 | 0 | 0 |
| Maze6       | 0 | 0 | 0 | 0 |
| Maze7       | 77.26 | 53.57 | 0 | 0 |
| Maze10      | 0 | 0 | 0 | 0 |
| MiyazakiA   | 0 | 0 | 0 | 0 |
