In [2]:
from lcs.agents.xcs import XCS, Configuration, Classifier, ClassifiersList
from lcs.agents.Agent import TrialMetrics

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def print_state(xcs: XCS):
    print(f"Num: {xcs.population.numerosity()}")
    print(f"Pop: {len(xcs.population)}")
    for cl in xcs.population:
        print(str(cl))


In [3]:
import gym
import gym_simple_maze_rp
maze = gym.make('SimpleMazeRP-v0')
maze.render()

'State: None'

In [33]:
# Initializing XCS values
cfg = Configuration(theta_mna=4,
                    epsilon=0.5,
                    user_metrics_collector_fcn=None)


In [5]:

xcs = XCS(cfg)
print_state(xcs)


In [6]:
# Init Run Trials Explore
prev_action_set = None
prev_reward = None
prev_situation = None
time_stamp = 0  # steps
done = False  # eop
reward = None

raw_state = maze.reset()
state = xcs.cfg.environment_adapter.to_genotype(raw_state)

print(f"State: {state}")
maze.render()



State: ['0', '1', '1', '1']


'State: 3'

In [37]:
# While loop
if done: 
    print("solution found")
match_set = xcs.population.form_match_set(state, time_stamp)
prediction_array = xcs.generate_prediction_array(match_set)
action = xcs.select_action(prediction_array, match_set)
action_set = match_set.form_action_set(action)

print("\nMatch Set & Prediction Array:")
for i in range(len(match_set)):
    print(f"{str(match_set[i])}, {prediction_array[i]:3f}")
    
print("\nAction Set:")
for cl in action_set:
    print(str(cl))
    
print(f"\nAction: {str(action)}")

print_state(xcs)



Match Set & Prediction Array:
Cond:0111 - Act:3 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000], 0.000000
Cond:0111 - Act:2 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000], 0.000000
Cond:0111 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000], 0.000000
Cond:0111 - Act:0 - Num:1 [fit: 0.050, exp: 1.00, pred: 1.000000], 1.000000

Action Set:
Cond:0111 - Act:0 - Num:1 [fit: 0.050, exp: 1.00, pred: 1.000000]

Action: 0
Num: 8
Pop: 8
Cond:0111 - Act:3 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:0111 - Act:2 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:0111 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:0111 - Act:0 - Num:1 [fit: 0.050, exp: 1.00, pred: 1.000000]
Cond:1001 - Act:3 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000]
Cond:1001 - Act:2 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:1001 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:1001 - Act:0 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]


In [26]:

raw_state, reward, done, _ = maze.step(action)
state = xcs.cfg.environment_adapter.to_genotype(raw_state)

print(f"New State: {state}, Reward: {reward}")


New State: ['0', '1', '1', '1'], Reward: 0


In [27]:
if prev_action_set is not None and len(prev_action_set) > 0:
    print("Performing update")
    p = prev_reward + xcs.cfg.gamma * max(prediction_array)
    xcs.update_set(prev_action_set, p)
    xcs.run_ga(prev_action_set, prev_situation, time_stamp)
    
print_state(xcs)


Performing update
Num: 8
Pop: 8
Cond:0111 - Act:3 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:0111 - Act:2 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:0111 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:0111 - Act:0 - Num:1 [fit: 0.050, exp: 1.00, pred: 1.000000]
Cond:1001 - Act:3 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000]
Cond:1001 - Act:2 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:1001 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:1001 - Act:0 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]


In [28]:
if done:
    print("Performing update")
    p = reward
    xcs.update_set(prev_action_set, p)
    xcs.run_ga(action_set, state, time_stamp)
else:
    print("Postponing")
    prev_action_set = action_set
    prev_reward = reward
    prev_situation = state
time_stamp += 1

print_state(xcs)


Postponing
Num: 8
Pop: 8
Cond:0111 - Act:3 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:0111 - Act:2 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:0111 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:0111 - Act:0 - Num:1 [fit: 0.050, exp: 1.00, pred: 1.000000]
Cond:1001 - Act:3 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000]
Cond:1001 - Act:2 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:1001 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:1001 - Act:0 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
