In [1]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList

# Load environments
import gym
import my_mazes

# Allow to parallelize all benchmarks to do
import parmap

# For calculation standard deviation
import statistics

In [2]:
# Utils section

from lcs.metrics import population_metrics
from lcs.strategies.subsumption import does_subsume

def _maze_knowledge(population, environment) -> float:
    transitions = environment.env.get_all_possible_transitions()
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]
    # Count how many transitions are anticipated correctly
    nr_correct = 0
    # For all possible destinations from each path cell
    for start, action, end in transitions:
        p0 = environment.env.maze.perception(*start)
        p1 = environment.env.maze.perception(*end)
        if any([True for cl in reliable_classifiers
                if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1
    return nr_correct / len(transitions) * 100.0

def _maze_metrics(pop, env):
    metrics = {
        'knowledge': _maze_knowledge(pop, env)
    }
    # Add basic population metrics
    metrics.update(population_metrics(pop, env))
    return metrics

def clean_population(agent, does_anticipate_change:bool = True):
        compact_pop = []
        for cl in agent.population:
            to_keep = True
            for other in agent.population:
                if does_subsume(other, cl, agent.cfg.theta_exp):
                    to_keep = False
                    break
            if to_keep:
                compact_pop.append(cl)
        agent.population = ClassifiersList(*compact_pop)

        if does_anticipate_change:
            pop = [cl for cl in agent.population if cl.does_anticipate_change()]
            agent.population = ClassifiersList(*pop)

# Agent - BACS - BENCHMARKING

## Main Parameters

In [3]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

NUMBER_OF_EXPLORE_TRIALS = 1000
METRICS_TRIAL_FREQUENCY_EXPLORE = 20
BETA_EXPLORE = 0.05
EPSILON = 0.8

NUMBER_OF_EXPLOIT_TRIALS_NO_RL = 500
BETA_EXPLOIT_NO_RL = 0.00

NUMBER_OF_EXPLOIT_TRIALS_RL_START = 100
BETA_EXPLOIT_RL_START = 0.05

NUMBER_OF_EXPLOIT_TRIALS_RL = 500
BETA_EXPLOIT_RL = 0.05

NUMBER_OF_ITERATIONS_TO_BENCH = 30

NB_OF_PROCESSES = 24

## Full Details of Agent Parameters

In [4]:
cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=METRICS_TRIAL_FREQUENCY_EXPLORE,
    do_ga=False,
    do_subsumption=True,
    do_action_planning=False,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20
)

cfg_exploit_no_rl = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=True,
    do_action_planning=False,
    beta=BETA_EXPLOIT_NO_RL,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20
)

cfg_exploit_rl_start = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=True,
    do_action_planning=False,
    beta=BETA_EXPLOIT_RL_START,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20
)

cfg_exploit_rl = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=True,
    do_action_planning=False,
    beta=BETA_EXPLOIT_RL,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20
)

### Benchmarking - Maze

In [5]:
def acs2_bench_on_maze(env,n):
    avg_explore_list = []
    avg_exploit_no_rl_list = []
    avg_exploit_rl_start_list = []
    avg_exploit_rl_list = []
    knowledge_list = []
    population_list = []
    numerosity_list = []
    reliable_list = []
    
    for i in range(n):
        
        # Initialize environment
        maze = gym.make(env.id)
        
        # Reset it, by putting an agent into random position
        situation = maze.reset()
    
        # Training of ACS2 - Exploration
        agent_explore = ACS2(cfg_explore)
        population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)
    
        # Using ACS2 - Exploitation - No RL module
        agent_exploit_no_rl = ACS2(cfg_exploit_no_rl, population_explore)
        clean_population(agent_exploit_no_rl, True)
        population_exploit_no_rl, metrics_exploit_no_rl = agent_exploit_no_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_NO_RL)
        
        # Using ACS2 - Exploitation - Starting using RL module
        agent_exploit_rl_start = ACS2(cfg_exploit_rl_start, population_exploit_no_rl)
        clean_population(agent_exploit_rl_start, True)
        population_exploit_rl_start, metrics_exploit_rl_start = agent_exploit_rl_start.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL_START)
        
        # Using ACS2 - Exploitation - Using RL module
        agent_exploit_rl = ACS2(cfg_exploit_rl, population_exploit_rl_start)
        clean_population(agent_exploit_rl, True)
        population_exploit_rl, metrics_exploit_rl = agent_exploit_rl.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS_RL)
        
        # Get frequencies to compute averages
        metrics_trial_frequency_explore = METRICS_TRIAL_FREQUENCY_EXPLORE
        metrics_trial_frequency_exploit = 1
        
        # Get average in exploration mode
        avg_step_explore = 0
        for trial in metrics_explore:
            avg_step_explore += trial['steps_in_trial']
        avg_step_explore /= NUMBER_OF_EXPLORE_TRIALS / metrics_trial_frequency_explore
        avg_explore_list.append(avg_step_explore)
        
        # Get average in all exploitation modes
        avg_step_exploit_no_rl = 0
        for trial in metrics_exploit_no_rl:
            avg_step_exploit_no_rl += trial['steps_in_trial']
        avg_step_exploit_no_rl /= NUMBER_OF_EXPLOIT_TRIALS_NO_RL
        avg_exploit_no_rl_list.append(avg_step_exploit_no_rl)

        avg_step_exploit_rl_start = 0
        for trial in metrics_exploit_rl_start:
            avg_step_exploit_rl_start += trial['steps_in_trial']
        avg_step_exploit_rl_start /= NUMBER_OF_EXPLOIT_TRIALS_RL_START
        avg_exploit_rl_start_list.append(avg_step_exploit_rl_start)

        avg_step_exploit_rl = 0
        for trial in metrics_exploit_rl:
            avg_step_exploit_rl += trial['steps_in_trial']
        avg_step_exploit_rl /= NUMBER_OF_EXPLOIT_TRIALS_RL
        avg_exploit_rl_list.append(avg_step_exploit_rl)
        
        # Get other metrics
        knowledge_list.append(metrics_exploit_rl[-1]['knowledge'])
        population_list.append(metrics_exploit_rl[-1]['population'])
        numerosity_list.append(metrics_exploit_rl[-1]['numerosity'])
        reliable_list.append(metrics_exploit_rl[-1]['reliable'])
        
        if i == n-1:
            print(env.id,'|','#' * (i+1), '|')
        else:
            print(env.id,'|','#' * (i+1) ,' ' * (n-i-2), '|')
    
    
    # Compute the means and standard deviations
    avg_explore = float(sum(avg_explore_list)) / n
    std_explore = statistics.pstdev(avg_explore_list)
    avg_exploit_no_rl = float(sum(avg_exploit_no_rl_list)) / n
    std_exploit_no_rl = statistics.pstdev(avg_exploit_no_rl_list)
    avg_exploit_rl_start = float(sum(avg_exploit_rl_start_list)) / n
    std_exploit_rl_start = statistics.pstdev(avg_exploit_rl_start_list)
    avg_exploit_rl = float(sum(avg_exploit_rl_list)) / n
    std_exploit_rl = statistics.pstdev(avg_exploit_rl_list)
    avg_knowledge = float(sum(knowledge_list)) / n
    std_knowledge = statistics.pstdev(knowledge_list)
    avg_population = float(sum(population_list)) / n
    std_population = statistics.pstdev(population_list)
    avg_numerosity = float(sum(numerosity_list)) / n
    std_numerosity = statistics.pstdev(numerosity_list)
    avg_reliable = float(sum(reliable_list)) / n
    std_reliable = statistics.pstdev(reliable_list)
    
    result = {
        'maze'             : env.id,
        'avg_explore'      : avg_explore,
        'std_explore'      : std_explore,
        'avg_exploit_no_rl'      : avg_exploit_no_rl,
        'std_exploit_no_rl'      : std_exploit_no_rl,
        'avg_exploit_rl_start'      : avg_exploit_rl_start,
        'std_exploit_rl_start'      : std_exploit_rl_start,
        'avg_exploit_rl'      : avg_exploit_rl,
        'std_exploit_rl'      : std_exploit_rl,
        'avg_knowledge'    : avg_knowledge,
        'std_knowledge'    : std_knowledge,
        'avg_population'   : avg_population,
        'std_population'   : std_population,
        'avg_numerosity'   : avg_numerosity,
        'std_numerosity'   : std_numerosity,
        'avg_reliable'     : avg_reliable,
        'std_reliable'     : std_reliable,
        'avg_exploit_no_rl_list' : avg_exploit_no_rl_list,
        'avg_exploit_rl_list' : avg_exploit_rl_list
    }
    
    print(result)
    
    return result

Custom functions for getting available environments in Gym depending on the type of mazes :

In [6]:
filter_envs_typeIII = lambda env: env.id.startswith("Maze10-") or env.id.startswith("MazeE1") \
    or env.id.startswith("MazeE2") or env.id.startswith("Woods10")

filter_envs_typeII = lambda env: env.id.startswith("MazeF4") or env.id.startswith("Maze7") \
    or env.id.startswith("MiyazakiB")

filter_envs_typeI = lambda env: env.id.startswith("MazeB") or env.id.startswith("MazeD") \
    or env.id.startswith("Littman") or env.id.startswith("MiyazakiA") \
    or env.id.startswith("Cassandra")

filter_envs_na = lambda env: env.id.startswith("MazeF1") or env.id.startswith("MazeF2") \
    or env.id.startswith("MazeF3") or env.id.startswith("Woods1-") \
    or env.id.startswith("Woods14") or env.id.startswith("Maze4") \
    or env.id.startswith("Maze5") or env.id.startswith("MazeA")

all_envs = [env for env in gym.envs.registry.all()]

Benchmarking without the genetic algorithms :

In [7]:
maze_envs = [env for env in all_envs if filter_envs_na(env) or filter_envs_typeI(env) or filter_envs_typeII(env) or filter_envs_typeIII(env)]

results = parmap.map(acs2_bench_on_maze, maze_envs, NUMBER_OF_ITERATIONS_TO_BENCH, pm_pbar=True, pm_processes=NB_OF_PROCESSES)

  0%|          | 0/24 [00:00<?, ?it/s]

Woods100-v0 | #                              |
MazeF1-v0 | #                              |
MazeF2-v0 | #                              |
Woods100-v0 | ##                             |
Woods1-v0 | #                              |
MazeF1-v0 | ##                             |
MazeF3-v0 | #                              |
Woods100-v0 | ###                            |
MazeF1-v0 | ###                            |
Woods100-v0 | ####                           |
MazeF2-v0 | ##                             |
Woods1-v0 | ##                             |
MazeF1-v0 | ####                           |
Woods101-v0 | #                              |
Littman57-v0 | #                              |
Cassandra4x4-v0 | #                              |
Woods100-v0 | #####                          |
MazeF1-v0 | #####                          |
MazeF3-v0 | ##                             |
Woods100-v0 | ######                         |
Woods14-v0 | #                              |
Maze7-v0 | #                   

  4%|▍         | 1/24 [04:02<1:32:46, 242.02s/it]

Littman57-v0 | ########                       |
Woods101-v0 | #######                        |
Woods101demi-v0 | ####                           |
MazeF2-v0 | ################               |
Maze10-v0 | ###                            |
MazeF1-v0 | ############################## |
{'maze': 'MazeF1-v0', 'avg_explore': 10.159999999999998, 'std_explore': 1.3692138863839598, 'avg_exploit_no_rl': 1.7921333333333334, 'std_exploit_no_rl': 0.03401150132267354, 'avg_exploit_rl_start': 1.815, 'std_exploit_rl_start': 0.0743303437365925, 'avg_exploit_rl': 1.7977999999999996, 'std_exploit_rl': 0.0313192166781568, 'avg_knowledge': 100.0, 'std_knowledge': 0.0, 'avg_population': 14.0, 'std_population': 0.0, 'avg_numerosity': 14.0, 'std_numerosity': 0.0, 'avg_reliable': 14.0, 'std_reliable': 0.0, 'avg_exploit_no_rl_list': [1.72, 1.786, 1.816, 1.774, 1.84, 1.832, 1.812, 1.826, 1.824, 1.85, 1.776, 1.75, 1.832, 1.76, 1.796, 1.8, 1.798, 1.744, 1.814, 1.788, 1.792, 1.754, 1.726, 1.832, 1.764, 1.762, 1.808, 1

  8%|▊         | 2/24 [04:10<1:02:59, 171.81s/it]

MazeA-v0 | ###                            |
Woods102-v0 | ####                           |
MazeF2-v0 | #################              |
Woods1-v0 | ##############                 |
MiyazakiA-v0 | ##                             |
MiyazakiB-v0 | ##                             |
Cassandra4x4-v0 | ######                         |
Maze7-v0 | #####                          |
Littman57-v0 | #########                      |
Woods14-v0 | #####                          |
MazeF2-v0 | ##################             |
MazeF3-v0 | ##########                     |
Woods101-v0 | ########                       |
Woods1-v0 | ###############                |
MazeB-v0 | ###                            |
MazeF4-v0 | ####                           |
MazeD-v0 | ####                           |
MazeF2-v0 | ###################            |
Littman57-v0 | ##########                     |
MazeF3-v0 | ###########                    |
Cassandra4x4-v0 | #######                        |
Woods1-v0 | ################  

 12%|█▎        | 3/24 [09:00<1:12:32, 207.27s/it]

MiyazakiA-v0 | ####                           |
MazeF3-v0 | ###################            |
Woods101demi-v0 | #########                      |
Woods1-v0 | ############################   |
Woods101-v0 | ###############                |
Woods102-v0 | ########                       |
Littman57-v0 | ##################             |
MazeF4-v0 | ########                       |
Woods1-v0 | #############################  |
MazeF3-v0 | ####################           |
Littman89-v0 | #########                      |
MazeB-v0 | #####                          |
Cassandra4x4-v0 | #############                  |
Woods1-v0 | ############################## |
{'maze': 'Woods1-v0', 'avg_explore': 9.109333333333334, 'std_explore': 1.1413878491653142, 'avg_exploit_no_rl': 1.6223333333333332, 'std_exploit_no_rl': 0.02082359772523037, 'avg_exploit_rl_start': 1.6213333333333333, 'std_exploit_rl_start': 0.04551434450319538, 'avg_exploit_rl': 1.6288000000000002, 'std_exploit_rl': 0.023143609629153863, 'avg_

 17%|█▋        | 4/24 [09:44<52:45, 158.29s/it]  

Maze4-v0 | #####                          |
Littman57-v0 | ###################            |
MazeD-v0 | #######                        |
Woods101-v0 | ################               |
Maze5-v0 | ###                            |
MazeF3-v0 | #####################          |
Woods14-v0 | #########                      |
Maze7-v0 | ##########                     |
Woods101demi-v0 | ##########                     |
Littman57-v0 | ####################           |
Maze10-v0 | #######                        |
MazeA-v0 | #######                        |
Woods102-v0 | #########                      |
MazeF3-v0 | ######################         |
Woods101-v0 | #################              |
MazeF4-v0 | #########                      |
Cassandra4x4-v0 | ##############                 |
Littman57-v0 | #####################          |
MazeD-v0 | ########                       |
Woods14-v0 | ##########                     |
MazeF3-v0 | #######################        |
Littman89-v0 | ##########       

 21%|██        | 5/24 [14:10<1:00:21, 190.61s/it]

MazeF4-v0 | ############                   |
Littman57-v0 | ############################   |
Maze10-v0 | ##########                     |
MazeD-v0 | ###########                    |
Woods101-v0 | ########################       |
MazeB-v0 | ########                       |
MazeA-v0 | ##########                     |
Cassandra4x4-v0 | ###################            |
Woods14-v0 | ##############                 |
Littman89-v0 | #############                  |
Littman57-v0 | #############################  |
Maze7-v0 | ###############                |
Woods101demi-v0 | ###############                |
MazeE1-v0 | ####                           |
Woods101-v0 | #########################      |
Woods102-v0 | #############                  |
Littman57-v0 | ############################## |
{'maze': 'Littman57-v0', 'avg_explore': 22.339333333333336, 'std_explore': 3.9688436043204773, 'avg_exploit_no_rl': 37.1658, 'std_exploit_no_rl': 27.42061168464336, 'avg_exploit_rl_start': 8.202666666666667, 

 25%|██▌       | 6/24 [15:20<46:19, 154.43s/it]  

MiyazakiB-v0 | ######                         |
MazeF4-v0 | #############                  |
Cassandra4x4-v0 | ####################           |
Maze4-v0 | ########                       |
MiyazakiA-v0 | #######                        |
Maze10-v0 | ###########                    |
Woods101-v0 | ##########################     |
Woods14-v0 | ###############                |
MazeA-v0 | ###########                    |
Woods101demi-v0 | ################               |
Maze7-v0 | ################               |
Littman89-v0 | ##############                 |
MazeD-v0 | ############                   |
MazeB-v0 | #########                      |
Cassandra4x4-v0 | #####################          |
Woods101-v0 | ###########################    |
Woods102-v0 | ##############                 |
Maze5-v0 | #####                          |
MazeF4-v0 | ##############                 |
Woods101demi-v0 | #################              |
Maze7-v0 | #################              |
Woods14-v0 | #########

 29%|██▉       | 7/24 [17:58<44:03, 155.51s/it]

Littman89-v0 | ################               |
Cassandra4x4-v0 | ########################       |
MazeD-v0 | ##############                 |
Maze7-v0 | ###################            |
Maze10-v0 | #############                  |
Woods101demi-v0 | ###################            |
MazeE1-v0 | #####                          |
Woods102-v0 | ################               |
Woods14-v0 | ##################             |
MazeF4-v0 | ################               |
Cassandra4x4-v0 | #########################      |
Littman89-v0 | #################              |
MazeA-v0 | ##############                 |
Maze7-v0 | ####################           |
MazeB-v0 | ###########                    |
MazeD-v0 | ###############                |
Maze4-v0 | ##########                     |
Woods101demi-v0 | ####################           |
MiyazakiA-v0 | #########                      |
Cassandra4x4-v0 | ##########################     |
MiyazakiB-v0 | ########                       |
Maze5-v0 | ######

 33%|███▎      | 8/24 [22:10<49:11, 184.46s/it]

Woods101demi-v0 | #######################        |
MazeE2-v0 | ####                           |
Maze10-v0 | ################               |
MazeD-v0 | ##################             |
Maze5-v0 | #######                        |
Littman89-v0 | #####################          |
Maze7-v0 | ########################       |
Woods102-v0 | ####################           |
MazeA-v0 | #################              |
Maze4-v0 | ############                   |
Woods14-v0 | ######################         |
MazeF4-v0 | ####################           |
MazeB-v0 | ##############                 |
Woods101demi-v0 | ########################       |
MiyazakiA-v0 | ###########                    |
Maze10-v0 | #################              |
Maze7-v0 | #########################      |
MazeD-v0 | ###################            |
Littman89-v0 | ######################         |
MiyazakiB-v0 | ##########                     |
Woods102-v0 | #####################          |
MazeA-v0 | ##################     

 38%|███▊      | 9/24 [27:16<55:13, 220.93s/it]

Woods14-v0 | ###########################    |
Maze4-v0 | ###############                |
MazeD-v0 | #######################        |
Woods101demi-v0 | #############################  |
MiyazakiB-v0 | ############                   |
MazeB-v0 | #################              |
Woods102-v0 | #########################      |
MazeA-v0 | #####################          |
MazeE1-v0 | ########                       |
MazeF4-v0 | #########################      |
Maze5-v0 | #########                      |
Littman89-v0 | ###########################    |
Maze10-v0 | #####################          |
Woods14-v0 | ############################   |
Woods101demi-v0 | ############################## |
{'maze': 'Woods101demi-v0', 'avg_explore': 37.21600000000001, 'std_explore': 3.6838617418862682, 'avg_exploit_no_rl': 68.09920000000001, 'std_exploit_no_rl': 2.92576597833798, 'avg_exploit_rl_start': 21.521333333333327, 'std_exploit_rl_start': 5.0656152856511145, 'avg_exploit_rl': 47.56026666666667, 'std_ex

 42%|████▏     | 10/24 [28:18<40:25, 173.25s/it]

Woods102-v0 | ##########################     |
MazeD-v0 | ########################       |
MiyazakiA-v0 | ##############                 |
MazeF4-v0 | ##########################     |
MazeA-v0 | ######################         |
Maze4-v0 | ################               |
MazeB-v0 | ##################             |
Woods14-v0 | #############################  |
Littman89-v0 | ############################   |
Maze10-v0 | ######################         |
Woods102-v0 | ###########################    |
MazeD-v0 | #########################      |
MiyazakiB-v0 | #############                  |
MazeF4-v0 | ###########################    |
Woods14-v0 | ############################## |
{'maze': 'Woods14-v0', 'avg_explore': 68.95533333333333, 'std_explore': 4.936013731297306, 'avg_exploit_no_rl': 9.543666666666669, 'std_exploit_no_rl': 0.22463842552471358, 'avg_exploit_rl_start': 9.410333333333332, 'std_exploit_rl_start': 0.3779813340482419, 'avg_exploit_rl': 9.52006666666667, 'std_exploit_rl': 0

 46%|████▌     | 11/24 [30:06<33:17, 153.68s/it]

MazeA-v0 | #######################        |
Littman89-v0 | #############################  |
Maze4-v0 | #################              |
Woods102-v0 | ############################   |
MiyazakiA-v0 | ###############                |
MazeB-v0 | ###################            |
Maze5-v0 | ##########                     |
Maze10-v0 | #######################        |
MazeD-v0 | ##########################     |
MazeF4-v0 | ############################   |
MazeE1-v0 | #########                      |
Littman89-v0 | ############################## |
{'maze': 'Littman89-v0', 'avg_explore': 32.95333333333333, 'std_explore': 5.725371215524419, 'avg_exploit_no_rl': 34.54213333333333, 'std_exploit_no_rl': 27.86738290754185, 'avg_exploit_rl_start': 6.685, 'std_exploit_rl_start': 2.1269034612161724, 'avg_exploit_rl': 5.361266666666668, 'std_exploit_rl': 0.7822089206571065, 'avg_knowledge': 71.42857142857139, 'std_knowledge': 0.0, 'avg_population': 100.3, 'std_population': 3.917056718166503, 'avg_numero

 50%|█████     | 12/24 [31:12<25:28, 127.38s/it]

Woods102-v0 | #############################  |
MazeF4-v0 | #############################  |
Maze10-v0 | ########################       |
MazeD-v0 | ###########################    |
MiyazakiB-v0 | ##############                 |
MazeE2-v0 | ######                         |
MazeB-v0 | ####################           |
Maze4-v0 | ##################             |
MazeA-v0 | #########################      |
Woods102-v0 | ############################## |
{'maze': 'Woods102-v0', 'avg_explore': 36.70466666666666, 'std_explore': 4.505435778651779, 'avg_exploit_no_rl': 60.99926666666667, 'std_exploit_no_rl': 9.220825060457202, 'avg_exploit_rl_start': 12.673333333333337, 'std_exploit_rl_start': 2.2834145970940587, 'avg_exploit_rl': 26.11466666666666, 'std_exploit_rl': 2.150538867870614, 'avg_knowledge': 51.260162601626035, 'std_knowledge': 0.21890913850140248, 'avg_population': 74.43333333333334, 'std_population': 1.6669999666733317, 'avg_numerosity': 74.43333333333334, 'std_numerosity': 1.666999

 54%|█████▍    | 13/24 [32:30<20:38, 112.57s/it]

MazeF4-v0 | ############################## |
{'maze': 'MazeF4-v0', 'avg_explore': 56.887333333333316, 'std_explore': 6.591104072072363, 'avg_exploit_no_rl': 52.095, 'std_exploit_no_rl': 5.803731207881127, 'avg_exploit_rl_start': 50.260666666666665, 'std_exploit_rl_start': 9.374543165165734, 'avg_exploit_rl': 44.64179999999999, 'std_exploit_rl': 9.636435348543914, 'avg_knowledge': 87.2, 'std_knowledge': 2.6127890589687235, 'avg_population': 43.03333333333333, 'std_population': 2.258071940591993, 'avg_numerosity': 43.03333333333333, 'std_numerosity': 2.258071940591993, 'avg_reliable': 31.966666666666665, 'std_reliable': 5.381965770567066, 'avg_exploit_no_rl_list': [58.092, 57.914, 61.784, 48.854, 42.476, 41.186, 51.38, 53.916, 50.426, 53.938, 49.548, 49.278, 50.62, 59.812, 48.71, 42.056, 52.754, 40.936, 53.92, 52.008, 60.214, 55.494, 50.206, 48.656, 61.572, 53.766, 51.19, 49.878, 62.226, 50.04], 'avg_exploit_rl_list': [37.73, 66.17, 56.404, 48.454, 54.692, 56.886, 59.41, 35.46, 54.484, 3

 58%|█████▊    | 14/24 [32:40<13:37, 81.80s/it] 

MazeD-v0 | ############################   |
Maze10-v0 | #########################      |
Maze5-v0 | ###########                    |
MazeA-v0 | ##########################     |
MazeB-v0 | #####################          |
Maze4-v0 | ###################            |
MazeD-v0 | #############################  |
MazeE1-v0 | ##########                     |
MiyazakiB-v0 | ###############                |
MiyazakiA-v0 | #################              |
Maze10-v0 | ##########################     |
MazeA-v0 | ###########################    |
MazeD-v0 | ############################## |
{'maze': 'MazeD-v0', 'avg_explore': 25.210666666666665, 'std_explore': 2.6643509945617567, 'avg_exploit_no_rl': 2.774933333333333, 'std_exploit_no_rl': 0.05840715329097583, 'avg_exploit_rl_start': 2.772666666666667, 'std_exploit_rl_start': 0.10009773002199181, 'avg_exploit_rl': 2.783733333333333, 'std_exploit_rl': 0.060794151765518396, 'avg_knowledge': 87.53472222222221, 'std_knowledge': 0.18698488913661304, 'avg_

 62%|██████▎   | 15/24 [34:44<14:10, 94.46s/it]

MazeB-v0 | ######################         |
Maze4-v0 | ####################           |
Maze10-v0 | ###########################    |
MazeA-v0 | ############################   |
MiyazakiA-v0 | ##################             |
MazeE2-v0 | #######                        |
Maze5-v0 | ############                   |
MiyazakiB-v0 | ################               |
MazeB-v0 | #######################        |
Maze10-v0 | ############################   |
MazeA-v0 | #############################  |
Maze4-v0 | #####################          |
MazeE1-v0 | ###########                    |
MiyazakiA-v0 | ###################            |
Maze10-v0 | #############################  |
MazeA-v0 | ############################## |
{'maze': 'MazeA-v0', 'avg_explore': 49.52466666666667, 'std_explore': 4.249011440585001, 'avg_exploit_no_rl': 4.242, 'std_exploit_no_rl': 0.08571580951026468, 'avg_exploit_rl_start': 4.239000000000001, 'std_exploit_rl_start': 0.15062868252759837, 'avg_exploit_rl': 4.229666666666

 67%|██████▋   | 16/24 [37:46<16:05, 120.73s/it]

MiyazakiB-v0 | #################              |
Maze5-v0 | #############                  |
Maze4-v0 | ######################         |
Maze10-v0 | ############################## |
{'maze': 'Maze10-v0', 'avg_explore': 51.876000000000005, 'std_explore': 4.628973680345712, 'avg_exploit_no_rl': 75.32839999999997, 'std_exploit_no_rl': 6.73049282791882, 'avg_exploit_rl_start': 57.74833333333334, 'std_exploit_rl_start': 8.840723229590564, 'avg_exploit_rl': 64.81706666666668, 'std_exploit_rl': 4.198224886253184, 'avg_knowledge': 65.39007092198582, 'std_knowledge': 2.3264141087740016, 'avg_population': 52.5, 'std_population': 3.9560080889704965, 'avg_numerosity': 52.5, 'std_numerosity': 3.9560080889704965, 'avg_reliable': 29.533333333333335, 'std_reliable': 2.334285520001546, 'avg_exploit_no_rl_list': [78.324, 74.896, 79.28, 69.434, 75.338, 79.674, 78.286, 82.562, 77.32, 66.336, 77.112, 71.008, 76.154, 72.528, 77.73, 78.504, 51.416, 79.088, 75.154, 74.974, 80.904, 80.848, 79.478, 59.676, 80.25

 71%|███████   | 17/24 [38:38<11:40, 100.11s/it]

MiyazakiA-v0 | ####################           |
MazeB-v0 | #########################      |
MiyazakiB-v0 | ##################             |
MazeE1-v0 | ############                   |
Maze4-v0 | #######################        |
MazeE2-v0 | ########                       |
MazeB-v0 | ##########################     |
MiyazakiA-v0 | #####################          |
Maze5-v0 | ##############                 |
Maze4-v0 | ########################       |
MiyazakiB-v0 | ###################            |
MazeB-v0 | ###########################    |
MiyazakiA-v0 | ######################         |
MazeE1-v0 | #############                  |
Maze4-v0 | #########################      |
MazeB-v0 | ############################   |
MiyazakiB-v0 | ####################           |
Maze5-v0 | ###############                |
MiyazakiA-v0 | #######################        |
MazeB-v0 | #############################  |
Maze4-v0 | ##########################     |
MazeE2-v0 | #########                      |


 75%|███████▌  | 18/24 [45:40<19:40, 196.69s/it]

Maze4-v0 | ###########################    |
MiyazakiB-v0 | ######################         |
Maze4-v0 | ############################   |
MiyazakiA-v0 | #########################      |
Maze5-v0 | #################              |
MazeE1-v0 | ###############                |
Maze4-v0 | #############################  |
MiyazakiB-v0 | #######################        |
MazeE2-v0 | ##########                     |
MiyazakiA-v0 | ##########################     |
Maze4-v0 | ############################## |
{'maze': 'Maze4-v0', 'avg_explore': 31.803333333333335, 'std_explore': 2.811860989135526, 'avg_exploit_no_rl': 3.5101333333333327, 'std_exploit_no_rl': 0.06210514381988727, 'avg_exploit_rl_start': 3.513333333333333, 'std_exploit_rl_start': 0.15657444093962322, 'avg_exploit_rl': 3.5033333333333343, 'std_exploit_rl': 0.05303416089863418, 'avg_knowledge': 100.0, 'std_knowledge': 0.0, 'avg_population': 162.53333333333333, 'std_population': 4.447721014432248, 'avg_numerosity': 162.53333333333333, '

 79%|███████▉  | 19/24 [50:18<18:25, 221.09s/it]

MiyazakiA-v0 | ###########################    |
Maze5-v0 | ##################             |
MiyazakiB-v0 | ########################       |
MazeE1-v0 | ################               |
MiyazakiA-v0 | ############################   |
MiyazakiB-v0 | #########################      |
Maze5-v0 | ###################            |
MiyazakiA-v0 | #############################  |
MazeE2-v0 | ###########                    |
MazeE1-v0 | #################              |
MiyazakiB-v0 | ##########################     |
MiyazakiA-v0 | ############################## |
{'maze': 'MiyazakiA-v0', 'avg_explore': 26.455333333333336, 'std_explore': 4.11796934854493, 'avg_exploit_no_rl': 9.073733333333333, 'std_exploit_no_rl': 12.079408293823374, 'avg_exploit_rl_start': 3.899666666666667, 'std_exploit_rl_start': 0.44868313491322087, 'avg_exploit_rl': 3.7806, 'std_exploit_rl': 0.20518229293321907, 'avg_knowledge': 70.64444444444446, 'std_knowledge': 0.11967032904743427, 'avg_population': 211.23333333333332, 's

 83%|████████▎ | 20/24 [55:16<16:16, 244.17s/it]

Maze5-v0 | ####################           |
MiyazakiB-v0 | ###########################    |
MazeE1-v0 | ##################             |
MazeE2-v0 | ############                   |
Maze5-v0 | #####################          |
MiyazakiB-v0 | ############################   |
MiyazakiB-v0 | #############################  |
MazeE1-v0 | ###################            |
Maze5-v0 | ######################         |
MiyazakiB-v0 | ############################## |
{'maze': 'MiyazakiB-v0', 'avg_explore': 41.747333333333344, 'std_explore': 4.226508356656697, 'avg_exploit_no_rl': 5.159066666666665, 'std_exploit_no_rl': 5.583300857218982, 'avg_exploit_rl_start': 3.6839999999999997, 'std_exploit_rl_start': 0.20685905024114043, 'avg_exploit_rl': 3.6695333333333333, 'std_exploit_rl': 0.10411619577290664, 'avg_knowledge': 77.92857142857143, 'std_knowledge': 0.21428571428571247, 'avg_population': 202.16666666666666, 'std_population': 7.2253412068604455, 'avg_numerosity': 202.16666666666666, 'std_numerosi

 88%|████████▊ | 21/24 [1:02:04<14:39, 293.33s/it]

MazeE2-v0 | #############                  |
Maze5-v0 | #######################        |
MazeE1-v0 | ####################           |
Maze5-v0 | ########################       |
MazeE1-v0 | #####################          |
MazeE2-v0 | ##############                 |
Maze5-v0 | #########################      |
MazeE1-v0 | ######################         |
Maze5-v0 | ##########################     |
MazeE2-v0 | ###############                |
MazeE1-v0 | #######################        |
Maze5-v0 | ###########################    |
MazeE1-v0 | ########################       |
Maze5-v0 | ############################   |
MazeE2-v0 | ################               |
MazeE1-v0 | #########################      |
Maze5-v0 | #############################  |
MazeE2-v0 | #################              |
MazeE1-v0 | ##########################     |
Maze5-v0 | ############################## |
{'maze': 'Maze5-v0', 'avg_explore': 47.475333333333325, 'std_explore': 3.8874933940636973, 'avg_exploit_no_r

 92%|█████████▏| 22/24 [1:20:20<17:48, 534.16s/it]

MazeE1-v0 | ###########################    |
MazeE2-v0 | ##################             |
MazeE1-v0 | ############################   |
MazeE2-v0 | ###################            |
MazeE1-v0 | #############################  |
MazeE1-v0 | ############################## |
{'maze': 'MazeE1-v0', 'avg_explore': 20.938666666666663, 'std_explore': 2.8634661203203056, 'avg_exploit_no_rl': 37.84579999999999, 'std_exploit_no_rl': 6.132519424619325, 'avg_exploit_rl_start': 6.017666666666666, 'std_exploit_rl_start': 0.9847899381199131, 'avg_exploit_rl': 4.471933333333334, 'std_exploit_rl': 0.9666633310287277, 'avg_knowledge': 53.37499999999999, 'std_knowledge': 0.12499999999999929, 'avg_population': 349.8, 'std_population': 17.514565367145142, 'avg_numerosity': 349.8, 'std_numerosity': 17.514565367145142, 'avg_reliable': 157.06666666666666, 'std_reliable': 4.898525855352358, 'avg_exploit_no_rl_list': [28.116, 35.042, 38.442, 37.132, 39.384, 37.588, 33.442, 35.502, 36.274, 45.802, 41.91, 32.844, 38.

 96%|█████████▌| 23/24 [1:32:14<09:48, 588.13s/it]

MazeE2-v0 | ####################           |
MazeE2-v0 | #####################          |
MazeE2-v0 | ######################         |
MazeE2-v0 | #######################        |
MazeE2-v0 | ########################       |
MazeE2-v0 | #########################      |
MazeE2-v0 | ##########################     |
MazeE2-v0 | ###########################    |
MazeE2-v0 | ############################   |
MazeE2-v0 | #############################  |
MazeE2-v0 | ############################## |
{'maze': 'MazeE2-v0', 'avg_explore': 28.522666666666666, 'std_explore': 3.8281404827351304, 'avg_exploit_no_rl': 58.883133333333326, 'std_exploit_no_rl': 3.0389295344834095, 'avg_exploit_rl_start': 18.18933333333333, 'std_exploit_rl_start': 4.479970932445383, 'avg_exploit_rl': 36.50726666666666, 'std_exploit_rl': 3.7947058378160956, 'avg_knowledge': 36.875, 'std_knowledge': 2.0354905926848663, 'avg_population': 304.93333333333334, 'std_population': 23.878767881855953, 'avg_numerosity': 304.9333333333

100%|██████████| 24/24 [2:17:48<00:00, 344.52s/it] 


Parsing the previous result to get a markdown string for each environment :

In [8]:
markdown_str = ''

for item in results:
    markdown_str += '| ACS2 |'
    markdown_str += item['maze'] + '|'
    markdown_str += "{:.3f}".format(item['avg_explore']) + '|'
    markdown_str += "{:.3f}".format(item['std_explore']) + '|'
    markdown_str += "{:.3f}".format(item['avg_exploit_no_rl']) + '|'
    markdown_str += "{:.3f}".format(item['std_exploit_no_rl']) + '|'
    markdown_str += "{:.3f}".format(min(item['avg_exploit_no_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(max(item['avg_exploit_no_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(item['avg_exploit_rl']) + '|'
    markdown_str += "{:.3f}".format(item['std_exploit_rl']) + '|'
    markdown_str += "{:.3f}".format(min(item['avg_exploit_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(max(item['avg_exploit_rl_list'])) + '|'
    markdown_str += "{:.3f}".format(item['avg_knowledge']) + '|'
    markdown_str += "{:.3f}".format(item['std_knowledge']) + '|'
    markdown_str += "{:.3f}".format(item['avg_population']) + '|'
    markdown_str += "{:.3f}".format(item['std_population']) + '|'
    markdown_str += "{:.3f}".format(item['avg_reliable']) + '|'
    markdown_str += "{:.3f}".format(item['std_reliable']) + '|'
    markdown_str += '\n'
    
print(markdown_str)

| ACS2 |Cassandra4x4-v0|13.626|1.452|2.953|0.423|2.212|4.038|2.915|0.415|2.204|3.952|50.741|0.489|130.700|10.799|54.467|3.222|
| ACS2 |Littman57-v0|22.339|3.969|37.166|27.421|3.590|70.384|4.319|1.165|3.562|8.300|73.496|3.256|34.633|0.657|27.200|1.013|
| ACS2 |Littman89-v0|32.953|5.725|34.542|27.867|4.558|63.952|5.361|0.782|4.584|7.624|71.429|0.000|100.300|3.917|71.167|0.522|
| ACS2 |MazeA-v0|49.525|4.249|4.242|0.086|4.098|4.512|4.230|0.088|4.070|4.514|100.000|0.000|102.633|2.810|102.633|2.810|
| ACS2 |MazeB-v0|34.960|4.203|10.676|13.965|3.874|54.072|4.507|0.425|4.002|5.420|80.425|0.176|160.600|8.689|118.867|3.314|
| ACS2 |MazeD-v0|25.211|2.664|2.775|0.058|2.656|2.902|2.784|0.061|2.658|2.920|87.535|0.187|128.667|5.723|107.167|3.670|
| ACS2 |MazeF4-v0|56.887|6.591|52.095|5.804|40.936|62.226|44.642|9.636|27.882|66.170|87.200|2.613|43.033|2.258|31.967|5.382|
| ACS2 |Maze4-v0|31.803|2.812|3.510|0.062|3.366|3.622|3.503|0.053|3.396|3.622|100.000|0.000|162.533|4.448|162.533|4.448|
| ACS2 |Maze

In [9]:
print(results)

[{'maze': 'Cassandra4x4-v0', 'avg_explore': 13.625999999999998, 'std_explore': 1.4515844217038616, 'avg_exploit_no_rl': 2.9532000000000007, 'std_exploit_no_rl': 0.4233654371029045, 'avg_exploit_rl_start': 2.988333333333333, 'std_exploit_rl_start': 0.407922242045657, 'avg_exploit_rl': 2.9152000000000005, 'std_exploit_rl': 0.4154623448641284, 'avg_knowledge': 50.740740740740755, 'std_knowledge': 0.4886560529645236, 'avg_population': 130.7, 'std_population': 10.798611021793498, 'avg_numerosity': 130.7, 'std_numerosity': 10.798611021793498, 'avg_reliable': 54.46666666666667, 'std_reliable': 3.2221455929585523, 'avg_exploit_no_rl_list': [2.842, 3.358, 2.28, 3.148, 3.676, 2.886, 2.782, 2.97, 2.922, 2.974, 2.892, 2.774, 3.144, 2.246, 2.696, 3.054, 2.944, 3.03, 2.992, 2.926, 2.932, 2.972, 3.462, 4.038, 2.388, 3.092, 3.78, 2.256, 2.212, 2.928], 'avg_exploit_rl_list': [2.702, 3.502, 2.204, 2.994, 3.42, 2.998, 2.83, 2.928, 2.96, 2.986, 2.944, 2.88, 3.14, 2.276, 2.63, 3.078, 2.922, 2.846, 3.04, 2.