In [105]:
import logging
logging.root.setLevel(logging.INFO)


In [106]:
import random

from xcs import XCSAlgorithm
from xcs.scenarios import Scenario

In [107]:
import random

from xcs.scenarios import Scenario
from xcs.bitstrings import BitString

class HaystackProblem(Scenario):
    
    def __init__(self, training_cycles=1000, input_size=50):
        self.input_size = input_size
        self.possible_actions = (True, False)
        self.initial_training_cycles = training_cycles
        self.remaining_cycles = training_cycles
        self.needle_index = random.randrange(input_size)
        self.needle_value = None

    def reset(self):
        self.remaining_cycles = self.initial_training_cycles
        haystack = BitString.random(self.input_size)
        self.needle_value = haystack[self.needle_index]
        
        sense_string = str(self.sense())
        raw_state = [str(s) for s in sense_string]
        return raw_state
        
    # XCS Hosford42 functions
    @property
    def is_dynamic(self):
        return False
        
    def get_possible_actions(self):
        return self.possible_actions
        
    def more(self):
        return self.remaining_cycles > 0
    
    def sense(self):
        haystack = BitString.random(self.input_size)
        self.needle_value = haystack[self.needle_index]
        return haystack
    
    def execute(self, action):
        self.remaining_cycles -= 1
        return action == self.needle_value

    # XCS Pyalcs functions
    def step(self, action):
        done = not self.execute(action)
        
        haystack = self.sense()
        sense_string = str(haystack)
        raw_state = [str(s) for s in sense_string]
        
        self.needle_value = haystack[self.needle_index]
        reward = action == self.needle_value
        return raw_state, reward, done, _

In [108]:
logging.root.setLevel(logging.INFO)
scenario = HaystackProblem()

In [109]:
algorithm = XCSAlgorithm()
algorithm.exploration_probability = .1
algorithm.discount_factor = 0
algorithm.do_ga_subsumption = True
algorithm.do_action_set_subsumption = True

In [110]:
model = algorithm.new_model(scenario)
model.run(scenario, learn=True)


In [111]:
print(model)


##1#1101####1##00#1##010#011##000100##0#111010#00# => False
    Time Stamp: 106
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.15000850000000002
    Experience: 1
    Action Set Size: 1.0
    Numerosity: 1
#00###1000#1011##0##11#0001#11#10010111#110000#0#1 => False
    Time Stamp: 226
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.15000850000000002
    Experience: 1
    Action Set Size: 1.0
    Numerosity: 1
01##001101010#00#0##01100#1011#111#01#00#0101#00## => False
    Time Stamp: 459
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.15000850000000002
    Experience: 1
    Action Set Size: 1.0
    Numerosity: 1
0##011110110001##11##1##11#0001011##0011##110#1111 => True
    Time Stamp: 461
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.15000850000000002
    Experience: 1
    Action Set Size: 1.0
    Numerosity: 1
##101001####110##00####010110#00#1##0###10##1010#1 => True
    Time Stamp: 486
    Average Reward: 0.0
    Error: 0.0
    Fitness: 0.1500085000000000

In [112]:
print(len(model))

200


In [113]:
for rule in model:
    if rule.fitness > .5 and rule.experience >= 10:
        print(rule.condition, '=>', rule.action, ' [%.5f]' % rule.fitness)

In [114]:
from lcs.agents.xcs import XCS, Configuration

cfg = Configuration(number_of_actions=4,
                    gamma=0,
                    metrics_trial_frequency=5,
                    covering_wildcard_chance=0.9
                    )


In [115]:
agent = XCS(cfg)
explore_population, explore_metrics = agent.explore(scenario, 1000, False)

In [116]:
for rule in explore_population:
    print(rule)


Cond:010100000111011#00100010#11110101111110#1100111100 - Act:2 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000]
Cond:11100##10110000010000100010#0111010111100010110101 - Act:0 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000]
Cond:10110100#000101#11011111010##0110#1101##1001001110 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000]
Cond:01001001#100101100#01001101000#110100110#00#001010 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000]
Cond:000101#011#0000110111111010001000110101#1101100101 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000]
Cond:1111#1001000000#1110101000001111010110#0010#111110 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000]
Cond:1111110###0000#0111010#00000111101011010010011#110 - Act:3 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000]
Cond:#1101110#110#01111000000010011#10110001#1101101000 - Act:3 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000]
Cond:001100010000100100000010111010101#11010111#1010101 - Act:2 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000]
Cond:1100#10#000000

In [117]:
print(len(explore_population))

200


In [118]:
for rule in explore_population:
    if rule.fitness > .5 and rule.experience >= 10:
        print(rule.condition, '=>', rule.action, ' [%.5f]' % rule.fitness)