# rACS in Real-Multiplexer

In [13]:
# Logger
import logging
logging.basicConfig(level=logging.WARN)

import numpy as np
import pandas as pd

# Import local paths
import sys, os
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('../../../../openai-envs'))

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs.representations.RealValueEncoder import RealValueEncoder
from lcs.agents.racs import RACS, Configuration

# Load OpenAI environments
import gym
import gym_multiplexer

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 3-bit Real-Multiplexer
In 3-bit multiplexer first bit is responsible for addressing the correct answer.

> For testing purposes the last bit is used as a flag indicating if the action chosen by the algorithm was correct. The environment always introduces it with 0. It's value is changed to 1 if the action was chosen correctly.

In [2]:
# Environment initialization
rmpx = gym.make('real-multiplexer-3bit-v0')

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


rMPX acts like a normal MPX with the difference that instead on boolean values floating numbers within range `[0,1]` are used. Environment used internal threshold to transform them into boolean values (`threshold=0.5` is used as default).

In [3]:
state = rmpx.reset()
state

[0.27772115871616476, 0.7426566047548088, 0.28689197501549846, 0]

Which corresponds to the following binary MPX:

In [4]:
threshold = 0.5  # secret knowledge
binary_state = map(lambda x: 1 if x > threshold else 0, state)

list(binary_state)

[0, 1, 0, 0]

Depending on the MPX size we have some actions to be executed. In 3-bit variant first bit (_address bit_) points to the correct answer (two possible values).

In [5]:
rmpx.action_space.n

2

We can also investigate the properties of a single observation (like vector dimension, lower and upper bound of each attribute).

In [6]:
print(f"Observation shape: {rmpx.observation_space.shape}")
print(f"Upper bounds: {rmpx.observation_space.low}")
print(f"Lower bounds: {rmpx.observation_space.high}")

Observation shape: (4,)
Upper bounds: [0. 0. 0. 0.]
Lower bounds: [1. 1. 1. 1.]


## rACS

- write abount _encoders_

In [51]:
def encode(bits, val):
    return int(RealValueEncoder(bits).encode(val))

r = np.arange(0, 1.1, .1)

df = pd.DataFrame(r, columns=['Perception'])

for bit in [1, 2, 3, 4, 5, 6, 7]:
    df[f'{bit}-bit'] = df.apply(lambda row: encode(bit, row['Perception']), axis=1)

df.set_index('Perception', inplace=True)
df

Unnamed: 0_level_0,1-bit,2-bit,3-bit,4-bit,5-bit,6-bit,7-bit
Perception,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,0,0,0,0,0,0,0
0.1,0,0,1,2,3,6,13
0.2,0,1,1,3,6,13,25
0.3,0,1,2,5,9,19,38
0.4,0,1,3,6,12,25,51
0.5,0,2,4,8,16,32,64
0.6,1,2,4,9,19,38,76
0.7,1,2,5,11,22,44,89
0.8,1,2,6,12,25,50,102
0.9,1,3,6,14,28,57,114


In the experiment each attribute range will be represented as an _unorded bounded tuple_ with desired encoding resolution.

In [53]:
ENCODER_BITS = 1

cfg = Configuration(rmpx.observation_space.shape[0], rmpx.action_space.n,
                    encoder=RealValueEncoder(ENCODER_BITS),
                    epsilon=0.5,  # exploration-exploitation ratio
                    do_ga=True,
                    theta_r=0.9,  # reliablity threshold
                    theta_i=0.2,  # inadequacy threshold
                    theta_ga=100,
                    chi=0.5,  # cross-over probability
                    mu=0.15)  # mutation probability

Initialize agent with no initial classifiers using configuration above.

In [54]:
agent = RACS(cfg)

Perform exploration for some trials

In [55]:
%%time

EXPLORE_TRIALS = 10_000
population, metrics = agent.explore(rmpx, EXPLORE_TRIALS)

CPU times: user 1min 47s, sys: 401 ms, total: 1min 48s
Wall time: 1min 48s


During the process the algorithm is gathering metrics. Let's inspect the last one

In [56]:
metrics[-1]

{'agent': {'population': 160,
  'numerosity': 160,
  'reliable': 129,
  'fitness': 473.2724739886333,
  'cover_ratio': 0.375,
  'trial': 9999,
  'steps': 1,
  'total_steps': 10000},
 'environment': None,
 'performance': None}

In [57]:
reliable = [cl for cl in population if cl.is_reliable()]
reliable = sorted(reliable, key=lambda cl: -cl.fitness)

for cl in reliable[:10]:
    print(cl.condition)
    print(cl.effect)
    print(cl.q, cl.r, cl.exp, cl.num)
    
    print("\n")

UBR(x1=1, x2=1)UBR(x1=0, x2=1)UBR(x1=0, x2=0)UBR(x1=0, x2=0)
UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=1, x2=1)
0.9999999999999989 999.9999999999989 866 1


UBR(x1=0, x2=0)UBR(x1=0, x2=0)UBR(x1=0, x2=1)UBR(x1=0, x2=0)
UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=1, x2=1)
0.9999999999999989 999.9999999999989 795 1


UBR(x1=0, x2=0)UBR(x1=0, x2=0)UBR(x1=0, x2=1)UBR(x1=0, x2=0)
UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=1, x2=1)
0.9999999999999989 999.9999999999989 760 1


UBR(x1=0, x2=0)UBR(x1=0, x2=0)UBR(x1=0, x2=1)UBR(x1=0, x2=0)
UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=1, x2=1)
0.9999999999999981 999.9999999999989 651 1


UBR(x1=1, x2=1)UBR(x1=0, x2=1)UBR(x1=0, x2=0)UBR(x1=0, x2=0)
UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=1, x2=1)
0.9999999999999958 999.9999999999989 633 1


UBR(x1=0, x2=0)UBR(x1=1, x2=1)UBR(x1=0, x2=1)UBR(x1=0, x2=0)
UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=0, x2=1)UBR(x1=1, x2=1)
0.9999999999999946 999.9999999999989 628 1


Try to exploit

In [58]:
EXPLOIT_TRIALS = 100

# Create new using the whole population
exploiter = RACS(cfg, population)
population_1, metrics_1 = exploiter.exploit(rmpx, EXPLOIT_TRIALS)