In [14]:
from lcs.agents.xcs import XCS, Configuration, Classifier, ClassifiersList
from lcs.agents.Agent import TrialMetrics

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def print_state(xcs: XCS):
    print(f"Num: {xcs.population.numerosity()}")
    print(f"Pop: {len(xcs.population)}")
    for cl in xcs.population:
        print(str(cl))


In [18]:
import gym
import gym_yacs_simple_maze, gym_corridor
# maze = gym.make('SimpleMaze-v0')
maze = gym.make('corridor-100-v0')

maze.reset()
maze.render()

[..........X........................................................................................$]


In [23]:
# Initializing XCS values
cfg = Configuration(number_of_actions=2,
                    epsilon=0.5,
                    user_metrics_collector_fcn=None)


In [24]:

xcs = XCS(cfg)
print_state(xcs)


Num: 0
Pop: 0


In [25]:
# Init Run Trials Explore
prev_action_set = None
prev_reward = None
prev_situation = None
time_stamp = 0  # steps
done = False  # eop
reward = None

raw_state = maze.reset()
state = xcs.cfg.environment_adapter.to_genotype(raw_state)

print(f"State: {state}")
maze.render()



State: 16
[...............X...................................................................................$]


In [52]:
# While loop
if done: 
    print("solution found")
match_set = xcs.population.form_match_set(state, time_stamp)
prediction_array = xcs.generate_prediction_array(match_set)
action = xcs.select_action(prediction_array, match_set)
action_set = match_set.form_action_set(action)

print("\nMatch Set & Prediction Array:")
for i in range(len(match_set)):
    print(f"{str(match_set[i])}, {prediction_array[i]:3f}")
    
print("\nAction Set:")
for cl in action_set:
    print(str(cl))
    
print(f"\nAction: {str(action)}")

print_state(xcs)



Match Set & Prediction Array:
Cond:16 - Act:1 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000], 0.000000
Cond:16 - Act:0 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000], 0.000000

Action Set:
Cond:16 - Act:0 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000]

Action: 0
Num: 6
Pop: 6
Cond:16 - Act:1 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:16 - Act:0 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000]
Cond:17 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:17 - Act:0 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:15 - Act:1 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:15 - Act:0 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]


In [53]:
from lcs.strategies.reinforcement_learning import simple_q_learning

raw_state, reward, done, _ = maze.step(action)
state = xcs.cfg.environment_adapter.to_genotype(raw_state)
reward = simple_q_learning(xcs.act_reward[action],
                           reward,
                           xcs.cfg.beta,
                           xcs.cfg.gamma,
                           match_set.best_prediction())


print(f"New State: {state}, Reward: {reward}")


New State: 15, Reward: 5.649403455557618e-41


In [54]:
if prev_action_set is not None and len(prev_action_set) > 0:
    print("Performing update")
    p = prev_reward + xcs.cfg.gamma * max(prediction_array)
    xcs.update_set(prev_action_set, p)
    xcs.run_ga(prev_action_set, prev_situation, time_stamp)
    
print_state(xcs)


Performing update
Num: 6
Pop: 6
Cond:16 - Act:1 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:16 - Act:0 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000]
Cond:17 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:17 - Act:0 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:15 - Act:1 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000]
Cond:15 - Act:0 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]


In [55]:
if done:
    print("Performing update")
    p = reward
    xcs.update_set(prev_action_set, p)
    xcs.run_ga(action_set, state, time_stamp)
else:
    print("Postponing")
    prev_action_set = action_set
    prev_reward = reward
    prev_situation = state
time_stamp += 1

print_state(xcs)






Postponing
Num: 6
Pop: 6
Cond:16 - Act:1 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:16 - Act:0 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000]
Cond:17 - Act:1 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
Cond:17 - Act:0 - Num:1 [fit: 0.050, exp: 1.00, pred: 0.000000]
Cond:15 - Act:1 - Num:1 [fit: 0.095, exp: 2.00, pred: 0.000000]
Cond:15 - Act:0 - Num:1 [fit: 0.000, exp: 0.00, pred: 0.000000]
