In [156]:
import logging
logging.root.setLevel(logging.INFO)


In [157]:
import random

from xcs import XCSAlgorithm
from xcs.scenarios import Scenario

In [158]:
import random

from xcs.scenarios import Scenario
from xcs.bitstrings import BitString

# environment setup
import gym
# noinspection PyUnresolvedReferences
import gym_maze


class MazeScenario(Scenario):
    
    def __init__(self, training_cycles=1000, input_size=1):
        self.input_size = input_size
        self.maze = gym.make('Maze5-v0')
        self.possible_actions = (0, 1, 2, 3, 4, 5, 6, 7)
        self.done = False
        self.state = None
        self.reward = 0
        self.state = self.maze.reset()
        self.remaining_cycles = training_cycles

    def reset(self):
        self.done = False
        self.state = self.maze.reset()
        return self.state
        
    # XCS Hosford42 functions
    @property
    def is_dynamic(self):
        return False
        
    def get_possible_actions(self):
        return self.possible_actions
        
    def more(self):
        if self.done:
            self.reset()
        self.remaining_cycles -= 1
        return self.remaining_cycles > 0
    
    def sense(self):
        return BitString(self.state)
    
    def execute(self, action):
        raw_state, step_reward, done, _ = self.maze.step(action)
        self.state = raw_state
        self.reward = step_reward
        self.done = done
        return self.reward

    # XCS Pyalcs functions
    def step(self, action):
        return self.maze.step(action)

In [159]:
training_cycles = 1000
input_size = 1
logging.root.setLevel(logging.INFO)
scenario = MazeScenario(training_cycles, input_size)

In [160]:
print(type(scenario.sense()))

<class 'xcs._python_bitstrings.BitString'>


In [161]:
algorithm = XCSAlgorithm()
algorithm.exploration_probability = .1
algorithm.discount_factor = 0
algorithm.wildcard_probability = .998
algorithm.do_ga_subsumption = True
algorithm.do_action_set_subsumption = True

In [162]:
model = algorithm.new_model(scenario)
model.run(scenario, learn=True)


In [163]:
print(model)


1####### => 0
    Time Stamp: 979
    Average Reward: 0.0053639358970496145
    Error: 0.05524853974452604
    Fitness: 9.663322275465782e-06
    Experience: 743
    Action Set Size: 54.79363865595093
    Numerosity: 1
#######1 => 3
    Time Stamp: 988
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.050215747938410916
    Experience: 3
    Action Set Size: 15.0
    Numerosity: 1
##11#### => 3
    Time Stamp: 988
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.06420609638581713
    Experience: 1
    Action Set Size: 17.0
    Numerosity: 1
####1### => 3
    Time Stamp: 988
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.06505609638581712
    Experience: 2
    Action Set Size: 16.0
    Numerosity: 1
###1#### => 3
    Time Stamp: 988
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.07187833210842784
    Experience: 5
    Action Set Size: 13.0
    Numerosity: 1
1####### => 3
    Time Stamp: 988
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.07663231759949928
  

In [164]:
print(len(model))

21


In [165]:
for rule in model:
    if rule.fitness > .05 and rule.experience >= 1:
        print(rule.condition, '=>', rule.action, ' [%.5f]' % rule.fitness)

######## => 5  [0.98538]
######## => 7  [0.96906]
######## => 2  [0.90651]
######## => 1  [0.51298]
######## => 4  [0.98538]
######## => 3  [0.64206]
######## => 6  [0.57039]
#######1 => 3  [0.05022]
1####### => 6  [0.14533]
1####### => 3  [0.07663]
######11 => 3  [0.13180]
11###### => 6  [0.14602]
####1### => 3  [0.06506]
##1##### => 6  [0.10304]
###1#### => 3  [0.07188]
#1#####1 => 0  [0.99951]
##11#### => 3  [0.06421]


In [166]:
from lcs.agents.xcs import XCS, Configuration

cfg = Configuration(number_of_actions=8,
                    gamma=0,
                    metrics_trial_frequency=5,
                    covering_wildcard_chance=0.002
                    )


In [167]:
agent = XCS(cfg)
explore_population, explore_metrics = agent.explore(scenario, training_cycles, False)

INFO:lcs.agents.Agent:{'trial': 0, 'steps_in_trial': 50, 'reward': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 'perf_time': 0.007192999999915628}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 40, 'reward': [112.56721594091879, 0.0, 0.012357094389507807, 0.0, 0.0, 0.0, 0.0, 0.0], 'perf_time': 0.005010500000025786}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 7, 'reward': [100.31762363941263, 0.0, 0.025202085617222873, 0.0, 0.0, 0.0, 0.0, 0.0], 'perf_time': 0.0010694000000057713}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 37, 'reward': [100.69953911162375, 0.0, 0.0001664515690303, 0.0, 0.0, 0.0, 0.0, 0.0], 'perf_time': 0.005030099999885351}
INFO:lcs.agents.Agent:{'trial': 400, 'steps_in_trial': 50, 'reward': [2.5806022569614915, 0.0, 0.03717564528487627, 0.0, 0.0, 0.0, 0.0, 0.0], 'perf_time': 0.006249499999967156}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 50, 'reward': [8.142423631338298, 0.0, 0.501724135122222, 0.0, 0.0, 0.0, 0.0, 0.0], 'perf_ti

In [168]:
for rule in explore_population:
    print(rule)


Cond:######## - Act:0 - Num:3 [fit: 0.030, exp: 14704.00, pred: 14.170]
Cond:#######1 - Act:1 - Num:3 [fit: 0.750, exp: 1375.00, pred: 0.000]
Cond:#0###### - Act:2 - Num:3 [fit: 0.489, exp: 4882.00, pred: 0.579]
Cond:#######0 - Act:3 - Num:3 [fit: 0.500, exp: 1789.00, pred: 0.000]
Cond:######## - Act:4 - Num:3 [fit: 0.750, exp: 2326.00, pred: 0.000]
Cond:######## - Act:5 - Num:3 [fit: 0.750, exp: 2357.00, pred: 0.000]
Cond:######## - Act:6 - Num:3 [fit: 0.750, exp: 2209.00, pred: 0.000]
Cond:######## - Act:7 - Num:3 [fit: 0.750, exp: 3034.00, pred: 0.000]
Cond:######## - Act:3 - Num:1 [fit: 0.303, exp: 3089.00, pred: 0.000]
Cond:######## - Act:2 - Num:1 [fit: 0.338, exp: 8629.00, pred: 0.234]


In [169]:
print(len(explore_population))

10


In [170]:
for rule in explore_population:
    if rule.fitness >= .05 and rule.experience >= 1:
        print(rule.condition, '=>', rule.action, ' [%.5f]' % rule.fitness)

#######1 => 1  [0.75000]
#0###### => 2  [0.48850]
#######0 => 3  [0.50000]
######## => 4  [0.75000]
######## => 5  [0.75000]
######## => 6  [0.75000]
######## => 7  [0.75000]
######## => 3  [0.30296]
######## => 2  [0.33770]


#### Conclusions
Both XCS implementations provide similar looking population. Both has population size of 200, and both provide similarly looking classifiers.
Main difference being differenty amount of classifiers with fitness above 0.05, which can easily be explained by algorithms using different RP.