In [1]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS ACSM module
from bacs.agents.bacs import BACS, Configuration
from bacs.agents.bacs.utils.GymMazeWrapper import _maze_metrics, parse_metrics_to_df, plot_performance

# Load environments
import gym
import my_mazes

## Agent - BACS

### Common parameters

In [2]:
CLASSIFIER_LENGTH = 8
NUMBER_OF_POSSIBLE_ACTIONS = 8

MAZE = "MazeE2-v0"

NUMBER_OF_EXPLORE_TRIALS = 1000
DO_ACTION_PLANNING_EXPLORE = False
DO_GA_EXPLORE = False
DO_SUBSUMPTION_EXPLORE = True
BETA_EXPLORE = 0.05
EPSILON = 0.8

NUMBER_OF_EXPLOIT_TRIALS = 500
DO_ACTION_PLANNING_EXPLOIT = False
DO_SUBSUMPTION_EXPLOIT = True
BETA_EXPLOIT = 0.05

LENGTH_OF_BEHAVIORAL_SEQUENCES = 1

### Environment - Maze

In [3]:
# Initialize environment
maze = gym.make(MAZE)
# Reset it, by putting an agent into random position
situation = maze.reset()
# Render the state in ASCII
maze.render()


[30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m
[30m■[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [30m■[0m
[30m■[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [30m■[0m
[30m■[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [30m■[0m
[30m■[0m [37m□[0m [37m□[0m [37m□[0m [33m$[0m [37m□[0m [37m□[0m [37m□[0m [30m■[0m
[30m■[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [30m■[0m
[30m■[0m [37m□[0m [37m□[0m [31mA[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [30m■[0m
[30m■[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m [30m■[0m
[30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m [30m■[0m


### Training of BACS - Exploration

In [4]:
%%time

cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=20,
    do_ga=DO_GA_EXPLORE,
    do_subsumption=DO_SUBSUMPTION_EXPLORE,
    do_action_planning=DO_ACTION_PLANNING_EXPLORE,
    action_planning_frequency=50,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

agent_explore = BACS(cfg_explore)
population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)

CPU times: user 2min 21s, sys: 585 ms, total: 2min 22s
Wall time: 2min 26s


In [5]:
population_explore_to_display = [cl for cl in population_explore if cl.does_anticipate_change()]
for cl in population_explore_to_display:
    print(cl)
    print("\n")

#####9## 3 None None #####0##         (empty)               q: 0.854 r: 99.26  ir: 0.0    f: 84.77  exp: 19  tga: 1     talp: 30181 tav: 1.51e+03 num: 1


#######9 4 None None #######0         (empty)               q: 0.743 r: 76.0   ir: 0.0    f: 56.5   exp: 12  tga: 17    talp: 29154 tav: 2.24e+03 num: 1


######9# 3 None None ######0#         (empty)               q: 0.887 r: 114.9  ir: 0.0    f: 102.0  exp: 22  tga: 23    talp: 28585 tav: 1.26e+03 num: 1


###111## 7 None None ###000##         (0{01}{01}111{01}{01}) q: 0.337 r: 304.7  ir: 0.0    f: 102.7  exp: 218 tga: 46    talp: 30072 tav: 1.53e+02 num: 1


9####### 4 None None 0#######         (empty)               q: 0.768 r: 76.13  ir: 0.0    f: 58.5   exp: 14  tga: 48    talp: 29339 tav: 1.95e+03 num: 1


######09 0 None None ######90         (empty)               q: 0.811 r: 394.4  ir: 0.0    f: 320.0  exp: 18  tga: 50    talp: 28803 tav: 1.51e+03 num: 1


#111#### 6 None None #000####         ({01}111{01}{01}0{01}) q: 0.364

0000#9#0 1 [7] [('0', '1', '1', '1', '0', '0', '0', '0')] 11###0#1         (empty)               q: 0.908 r: 167.9  ir: 0.0    f: 152.4  exp: 22  tga: 7018  talp: 29663 tav: 1.18e+03 num: 1


011100#0 5 [5] [('0', '0', '0', '0', '0', '0', '0', '0')] #000##9#         (01110000)            q: 0.232 r: 280.9  ir: 0.0    f: 65.05  exp: 27  tga: 7047  talp: 30200 tav: 1.04e+03 num: 1


00000000 1 [1] [('0', '0', '0', '0', '0', '0', '0', '0')] #111####         (00000000)            q: 0.322 r: 222.5  ir: 31.23  f: 71.75  exp: 56  tga: 7063  talp: 29907 tav: 5.2e+02 num: 1


00900000 4 None None #90#####         (empty)               q: 0.931 r: 611.3  ir: 1.067  f: 569.2  exp: 22  tga: 7117  talp: 29495 tav: 9.94e+02 num: 1


#09##### 4 None None #90#####         (empty)               q: 0.846 r: 579.4  ir: 0.0    f: 490.4  exp: 24  tga: 7117  talp: 29495 tav: 9.08e+02 num: 1


#0#####9 1 None None #######0         (empty)               q: 0.875 r: 196.9  ir: 10.2   f: 172.3  exp: 27  tga: 7


009000#0 0 [5] [('1', '1', '0', '0', '0', '0', '0', '1')] ##0#####         (empty)               q: 0.668 r: 183.1  ir: 18.87  f: 122.3  exp: 9   tga: 15736 talp: 30079 tav: 1.56e+03 num: 1


#0009##0 7 None None ####0###         (empty)               q: 0.716 r: 211.1  ir: 15.37  f: 151.1  exp: 12  tga: 15744 talp: 28931 tav: 1.02e+03 num: 1


000090#0 7 None None ####0###         (empty)               q: 0.73  r: 220.4  ir: 9.201  f: 160.9  exp: 12  tga: 15744 talp: 28931 tav: 1.02e+03 num: 1


#9#00000 3 [4] [('0', '0', '0', '0', '0', '0', '0', '0')] #0#111##         (empty)               q: 0.73  r: 248.0  ir: 22.09  f: 181.0  exp: 10  tga: 15803 talp: 28731 tav: 1.25e+03 num: 1


#9#0#0## 3 [4] [('0', '1', '1', '1', '0', '0', '0', '0')] #0#111##         (empty)               q: 0.801 r: 194.9  ir: 0.0    f: 156.2  exp: 11  tga: 15803 talp: 28731 tav: 1.27e+03 num: 1


#9000000 3 [4] [('0', '0', '0', '0', '0', '0', '0', '0')] #0#111##         (empty)               q: 0.768 r: 248.



#0#0#000 2 [1] [('0', '0', '0', '0', '0', '0', '0', '0')] 1111###1         (00{90}00000)         q: 0.407 r: 111.8  ir: 0.0    f: 45.54  exp: 4   tga: 29588 talp: 30233 tav: 6.05e+03 num: 1


#111#### 5 [0] [('0', '0', '0', '0', '0', '0', '0', '0')] 1#00###1         (01110000)            q: 0.451 r: 25.27  ir: 0.0    f: 11.4   exp: 2   tga: 29590 talp: 29681 tav: 9.89e+03 num: 1


00000000 5 None None ######9#         (00000000)            q: 0.379 r: 487.6  ir: 0.2672 f: 184.7  exp: 10  tga: 29595 talp: 29994 tav: 48.5   num: 1


######0# 5 None None ######9#         ({01}{01}{01}{01}{09}{09}0{019}) q: 0.162 r: 374.5  ir: 188.5  f: 60.57  exp: 24  tga: 29595 talp: 30299 tav: 28.6   num: 1


0000#0#0 5 None None #####9##         (0000{09}0{09}0)      q: 0.299 r: 350.7  ir: 0.0    f: 105.0  exp: 11  tga: 29598 talp: 30099 tav: 43.7   num: 1


110##001 5 [5] [('0', '0', '0', '0', '0', '0', '0', '0')] 00###11#         (11000001)            q: 0.814 r: 147.5  ir: 0.0    f: 120.0  exp: 4 

### Exploitation of BACS

In [None]:
%%time

cfg_exploit = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=1,
    do_ga=False,
    do_subsumption=DO_SUBSUMPTION_EXPLOIT,
    do_action_planning=DO_ACTION_PLANNING_EXPLOIT,
    action_planning_frequency=50,
    beta=BETA_EXPLOIT,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8,
    bs_max=1
)

agent_exploit = BACS(cfg_exploit, population_explore)
agent_exploit.clean_population(does_anticipate_change=True)
population_exploit, metrics_exploit = agent_exploit.exploit(maze, NUMBER_OF_EXPLOIT_TRIALS)

### Performance

In [None]:
metrics_trial_frequency_explore = cfg_explore.metrics_trial_frequency
metrics_trial_frequency_exploit = cfg_exploit.metrics_trial_frequency
metrics_df = parse_metrics_to_df(metrics_explore, metrics_trial_frequency_explore, metrics_exploit, metrics_trial_frequency_exploit)
plot_performance(agent_exploit, maze, metrics_df, cfg_exploit, MAZE, metrics_trial_frequency_explore, metrics_trial_frequency_exploit)

In [None]:
avg_step_explore = 0
for trial in metrics_explore:
    avg_step_explore += trial['steps_in_trial']
avg_step_explore /= NUMBER_OF_EXPLORE_TRIALS / metrics_trial_frequency_explore

print("Average number of steps to solve the maze is ",avg_step_explore,
      " for a total of ", NUMBER_OF_EXPLORE_TRIALS, " trials in EXPLORATION")

avg_step_exploit = 0
for trial in metrics_exploit:
    avg_step_exploit += trial['steps_in_trial']
avg_step_exploit /= NUMBER_OF_EXPLOIT_TRIALS / metrics_trial_frequency_exploit

print("Average number of steps to solve the maze is ",avg_step_exploit,
      " for a total of ", NUMBER_OF_EXPLOIT_TRIALS, " trials in EXPLOITATION")

In [None]:
population_exploit.sort(key=lambda cl: -cl.fitness)
population_exploit_to_display = [cl for cl in population_exploit]
for cl in population_exploit_to_display:
    print(cl)
    #print("{!r} \t {!r} \t {!r} \t {:.3f} \t {:.3f} \t {:.1f} \t {!r} \t {!r}".format(cl.condition, cl.action, cl.effect, cl.fitness, cl.q, cl.num, cl.exp, cl.mark))
    print("\n")