# Rule Compaction

Train the ACS2 agent and create a population of classifiers for environments "Corridor" and "Grid".

In [6]:
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration
from lcs import Perception

import gym
import gym_corridor
import gym_grid

In [4]:
# Instantiate environments

corridor = gym.make('corridor-20-v0')
grid = gym.make('grid-20-v0')

In [15]:
def build_population(env, classifier_length, number_of_possible_actions, environment_adapter, explore_trials=1000):
    cfg = Configuration(
        classifier_length,
        number_of_possible_actions,
        environment_adapter=environment_adapter
    )
    
    agent = ACS2(cfg)
    
    pop, _ = agent.explore(env, explore_trials)
    
    return pop

## Corridor

In [5]:
class CorridorAdapter(EnvironmentAdapter):
    @staticmethod
    def to_genotybpe(phenotype):
        return phenotype,

In [41]:
%%time
corridor_pop = build_population(corridor, 1, 2, CorridorAdapter)

CPU times: user 2.36 s, sys: 0 ns, total: 2.36 s
Wall time: 2.36 s


In [40]:
for cl in sorted(corridor_pop, key=lambda cl: -cl.fitness):
    print(f'{cl.condition}-{cl.action}-{cl.effect}\tq: {cl.q:.2f}, r: {cl.r:.2f}')

18-1-19	q: 1.00, r: 1000.00
17-1-18	q: 1.00, r: 950.00
16-1-17	q: 1.00, r: 902.50
18-0-17	q: 1.00, r: 902.50
15-1-16	q: 1.00, r: 857.37
17-0-16	q: 1.00, r: 857.37
14-1-15	q: 1.00, r: 814.51
16-0-15	q: 1.00, r: 814.51
13-1-14	q: 1.00, r: 773.78
15-0-14	q: 1.00, r: 773.78
12-1-13	q: 1.00, r: 735.09
14-0-13	q: 1.00, r: 735.09
11-1-12	q: 1.00, r: 698.34
13-0-12	q: 1.00, r: 698.34
10-1-11	q: 1.00, r: 663.42
12-0-11	q: 1.00, r: 663.42
9-1-10	q: 1.00, r: 630.25
11-0-10	q: 1.00, r: 630.25
8-1-9	q: 1.00, r: 598.74
10-0-9	q: 1.00, r: 598.70
7-1-8	q: 1.00, r: 568.80
9-0-8	q: 1.00, r: 568.74
6-1-7	q: 1.00, r: 540.36
8-0-7	q: 1.00, r: 540.14
5-1-6	q: 1.00, r: 513.34
7-0-6	q: 1.00, r: 511.94
4-1-5	q: 1.00, r: 487.67
6-0-5	q: 1.00, r: 486.72
3-1-4	q: 1.00, r: 463.14
5-0-4	q: 1.00, r: 459.97
2-1-3	q: 1.00, r: 438.54
4-0-3	q: 1.00, r: 421.65
1-1-2	q: 1.00, r: 403.69
3-0-2	q: 1.00, r: 376.17
0-1-1	q: 1.00, r: 322.50
2-0-1	q: 1.00, r: 282.10
0-0-#	q: 0.98, r: 184.14
1-0-0	q: 1.00, r: 161.73


## Grid

In [42]:
%%time
grid_pop = build_population(grid, 2, 4, EnvironmentAdapter)

CPU times: user 4min 44s, sys: 11.4 ms, total: 4min 44s
Wall time: 4min 44s


In [43]:
for cl in sorted(grid_pop, key=lambda cl: -cl.fitness):
    print(f'{cl.condition}-{cl.action}-{cl.effect}\tq: {cl.q:.2f}, r: {cl.r:.2f}')

19#-1-20#	q: 1.00, r: 76.81
1318-0-12#	q: 1.00, r: 34.05
20#-1-##	q: 1.00, r: 32.74
#1-3-##	q: 1.00, r: 30.37
#20-2-##	q: 1.00, r: 30.04
16#-1-17#	q: 1.00, r: 29.02
1218-0-11#	q: 1.00, r: 28.90
918-0-8#	q: 1.00, r: 28.49
18#-0-17#	q: 1.00, r: 28.32
17#-1-18#	q: 1.00, r: 28.29
18#-1-19#	q: 1.00, r: 28.28
19#-0-18#	q: 1.00, r: 28.25
20#-0-19#	q: 1.00, r: 28.22
15#-0-14#	q: 1.00, r: 28.19
14#-0-13#	q: 1.00, r: 28.13
14#-1-15#	q: 1.00, r: 28.07
15#-1-16#	q: 1.00, r: 28.04
16#-0-15#	q: 1.00, r: 28.03
518-0-4#	q: 1.00, r: 27.95
17#-0-16#	q: 1.00, r: 27.85
11#-0-10#	q: 1.00, r: 27.84
12#-1-13#	q: 1.00, r: 27.81
11#-1-12#	q: 1.00, r: 27.80
#11-3-#10	q: 1.00, r: 27.76
12#-0-11#	q: 1.00, r: 27.75
#10-2-#11	q: 1.00, r: 27.74
#10-3-#9	q: 1.00, r: 27.74
1118-0-10#	q: 1.00, r: 27.71
#9-2-#10	q: 1.00, r: 27.69
#13-3-#12	q: 1.00, r: 27.68
#12-3-#11	q: 1.00, r: 27.67
118-0-##	q: 1.00, r: 27.67
#11-2-#12	q: 1.00, r: 27.64
#13-2-#14	q: 1.00, r: 27.63
#12-2-#13	q: 1.00, r: 27.63
#14-3-#13	q: 1.00, r: 27.6