In [None]:
import logging
logging.root.setLevel(logging.INFO)


In [None]:
import random

from xcs import XCSAlgorithm
from xcs.scenarios import Scenario

In [None]:
import random

from xcs.scenarios import Scenario
from xcs.bitstrings import BitString

# environment setup
import gym
# noinspection PyUnresolvedReferences
import gym_maze


class MazeScenario(Scenario):
    
    def __init__(self, training_cycles=1000, input_size=8):
        self.input_size = input_size
        self.maze = gym.make('Maze5-v0')
        self.possible_actions = (0, 1, 2, 3, 4, 5, 6, 7)
        self.done = False
        self.state = None
        self.reward = 0
        self.state = self.maze.reset()
        self.remaining_cycles = training_cycles
        
        self.steps_array = []
        self.steps = 0

    def reset(self):
        self.done = False
        self.state = self.maze.reset()
        return self.state
        
    # XCS Hosford42 functions
    @property
    def is_dynamic(self):
        return False
        
    def get_possible_actions(self):
        return self.possible_actions
        
    def more(self):
        if self.done:
            self.reset()
            self.remaining_cycles -= 1
            self.steps_array.append(self.steps)
            self.steps = 0
            self.reset()
        return self.remaining_cycles >=0
        
    def sense(self):
        no_reward_state=[]
        for char in self.state:
            if char == '1' or char == '0':
                no_reward_state.append(char)
            else:
                no_reward_state.append('1')
        return BitString(''.join(no_reward_state))
    
    def execute(self, action):
        self.steps += 1
        raw_state, step_reward, done, _ = self.maze.step(action)
        self.state = raw_state
        self.reward = step_reward
        self.done = done
        return self.reward

    # XCS Pyalcs functions
    def step(self, action):
        return self.maze.step(action)

In [None]:
training_cycles = 1000
input_size = 1
logging.root.setLevel(logging.INFO)
scenario = MazeScenario(training_cycles, input_size)

In [None]:
print(str(scenario.state))
print(''.join(scenario.state))

no_reward_state=[]
for char in scenario.state:
    if char == '1' or char == '0':
        no_reward_state.append(char)
    else:
        no_reward_state.append('1')

print(''.join(no_reward_state))

In [None]:
algorithm = XCSAlgorithm()
algorithm.exploration_probability = .1
algorithm.discount_factor = 0
algorithm.wildcard_probability = .5
algorithm.do_ga_subsumption = True
algorithm.do_action_set_subsumption = True

In [None]:
model = algorithm.new_model(scenario)
model.run(scenario, learn=True)


In [None]:
print(model)


In [None]:
print(len(model))

In [None]:
for rule in model:
    if rule.fitness > .05 and rule.experience >= 1:
        print(rule.condition, '=>', rule.action, ' [%.5f]' % rule.fitness)

In [None]:
from lcs.agents.xcs import XCS, Configuration

cfg = Configuration(number_of_actions=8,
                    gamma=0,
                    metrics_trial_frequency=5,
                    covering_wildcard_chance=0.5
                    )


In [None]:
agent = XCS(cfg)
explore_population, explore_metrics = agent.explore(scenario, training_cycles, False)

In [None]:
for rule in explore_population:
    print(rule)


In [None]:
print(len(explore_population))

In [None]:
for rule in explore_population:
    if rule.fitness >= .05 and rule.experience >= 1:
        print(rule.condition, '=>', rule.action, ' [%.5f]' % rule.fitness)

In [None]:
import pandas as pd

df = pd.DataFrame(metric["steps_in_trial"] for metric in explore_metrics)
ax = df.plot()
ax.set_xlabel("trial")
ax.set_ylabel("steps_in_trial")
ax.legend(["steps my XCS"])

In [None]:
df = pd.DataFrame(scenario.steps_array)
ax = df.plot()
ax.set_xlabel("trial")
ax.set_ylabel("steps_in_trial")
ax.legend(["steps my XCS"])

#### Conclusions
