# rACS in Real-Multiplexer

In [1]:
# Logger
import logging
logging.basicConfig(level=logging.INFO)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import local paths
import sys, os
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('../../../../openai-envs'))

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs.representations.RealValueEncoder import RealValueEncoder
from lcs.agents.racs import RACS, Configuration

# Load OpenAI environments
import gym
import gym_multiplexer

## Real-Multiplexer
In 3-bit multiplexer first bit is responsible for addressing the correct answer.

> For testing purposes the last bit is used as a flag indicating if the action chosen by the algorithm was correct. The environment always introduces it with 0. It's value is changed to 1 if the action was chosen correctly.

In [2]:
# Environment initialization
rmpx = gym.make('real-multiplexer-3bit-v0')

rMPX acts like a normal MPX with the difference that instead on boolean values floating numbers within range `[0,1]` are used. Environment used internal threshold to transform them into boolean values (`threshold=0.5` is used as default).

In [3]:
state = rmpx.reset()
state

[0.44319905156876915, 0.27094940724807426, 0.10581927468978258, 0]

Which corresponds to the following binary MPX:

In [4]:
threshold = 0.5  # secret knowledge
binary_state = map(lambda x: 1 if x > threshold else 0, state)

list(binary_state)

[0, 0, 0, 0]

Depending on the MPX size we have some actions to be executed. In 3-bit variant first bit (_address bit_) points to the correct answer (two possible values).

In [5]:
rmpx.action_space.n

2

We can also investigate the properties of a single observation (like vector dimension, lower and upper bound of each attribute).

In [6]:
print(f"Observation shape: {rmpx.observation_space.shape}")
print(f"Upper bounds: {rmpx.observation_space.low}")
print(f"Lower bounds: {rmpx.observation_space.high}")

Observation shape: (4,)
Upper bounds: [0. 0. 0. 0.]
Lower bounds: [1. 1. 1. 1.]


## rACS

- write abount _encoders_

In [7]:
def encode(bits, val):
    return int(RealValueEncoder(bits).encode(val))

r = np.arange(0, 1.1, .1)

df = pd.DataFrame(r, columns=['Perception'])

for bit in [1, 2, 3, 4, 5, 6, 7]:
    df[f'{bit}-bit'] = df.apply(lambda row: encode(bit, row['Perception']), axis=1)

df.set_index('Perception', inplace=True)
df

Unnamed: 0_level_0,1-bit,2-bit,3-bit,4-bit,5-bit,6-bit,7-bit
Perception,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,0,0,0,0,0,0,0
0.1,0,0,1,2,3,6,13
0.2,0,1,1,3,6,13,25
0.3,0,1,2,5,9,19,38
0.4,0,1,3,6,12,25,51
0.5,0,2,4,8,16,32,64
0.6,1,2,4,9,19,38,76
0.7,1,2,5,11,22,44,89
0.8,1,2,6,12,25,50,102
0.9,1,3,6,14,28,57,114


## Experiment
In the experiment each attribute range will be represented as an _unorded bounded tuple_ with desired encoding resolution.

In [8]:
# We will use 6bit rmpx
rmpx = gym.make('real-multiplexer-3bit-v0')

ENCODER_BITS = 7

cfg = Configuration(rmpx.observation_space.shape[0], rmpx.action_space.n,
                    encoder=RealValueEncoder(ENCODER_BITS),
                    epsilon=1.0,  # exploration-exploitation ratio
                    do_ga=True,
                    theta_r=0.9,  # reliablity threshold
                    theta_i=0.2,  # inadequacy threshold
                    theta_ga=100,
                    chi=0.5,  # cross-over probability
                    mu=0.15)  # mutation probability

Initialize agent with no initial classifiers using configuration above.

In [9]:
agent = RACS(cfg)

Perform exploration for some trials

In [10]:
%%time

TRIALS = 50_000
population, metrics = agent.explore_exploit(rmpx, TRIALS)

INFO:lcs.agents.racs.RACS:{'agent': {'population': 1, 'numerosity': 1, 'reliable': 0, 'fitness': 25.0, 'cover_ratio': 0.7519685039370079, 'trial': 0, 'steps': 1, 'total_steps': 1}, 'environment': None, 'performance': {'reward': 1000}}
INFO:lcs.agents.racs.RACS:{'agent': {'population': 802, 'numerosity': 808, 'reliable': 0, 'fitness': 213.3901399182953, 'cover_ratio': 0.542897186168435, 'trial': 500, 'steps': 1, 'total_steps': 501}, 'environment': None, 'performance': {'reward': 1000}}
INFO:lcs.agents.racs.RACS:{'agent': {'population': 1617, 'numerosity': 1691, 'reliable': 0, 'fitness': 227.8973337986668, 'cover_ratio': 0.5139657867441981, 'trial': 1000, 'steps': 1, 'total_steps': 1001}, 'environment': None, 'performance': {'reward': 0}}
INFO:lcs.agents.racs.RACS:{'agent': {'population': 2161, 'numerosity': 2340, 'reliable': 0, 'fitness': 227.7305295550393, 'cover_ratio': 0.4976152043928443, 'trial': 1500, 'steps': 1, 'total_steps': 1501}, 'environment': None, 'performance': {'reward': 

INFO:lcs.agents.racs.RACS:{'agent': {'population': 15443, 'numerosity': 18398, 'reliable': 5, 'fitness': 270.8272169846251, 'cover_ratio': 0.3486477577436232, 'trial': 16000, 'steps': 1, 'total_steps': 16001}, 'environment': None, 'performance': {'reward': 0}}
INFO:lcs.agents.racs.RACS:{'agent': {'population': 15833, 'numerosity': 18918, 'reliable': 4, 'fitness': 271.646125483481, 'cover_ratio': 0.3469726590182767, 'trial': 16500, 'steps': 1, 'total_steps': 16501}, 'environment': None, 'performance': {'reward': 1000}}
INFO:lcs.agents.racs.RACS:{'agent': {'population': 16099, 'numerosity': 19242, 'reliable': 5, 'fitness': 272.29468096848797, 'cover_ratio': 0.3451940820895227, 'trial': 17000, 'steps': 1, 'total_steps': 17001}, 'environment': None, 'performance': {'reward': 0}}
INFO:lcs.agents.racs.RACS:{'agent': {'population': 16430, 'numerosity': 19686, 'reliable': 6, 'fitness': 272.7706472364672, 'cover_ratio': 0.34393777466801295, 'trial': 17500, 'steps': 1, 'total_steps': 17501}, 'en

KeyboardInterrupt: 

During the process the algorithm is gathering metrics. Let's inspect the last one

In [11]:
metrics[-1]

NameError: name 'metrics' is not defined

In [None]:
reliable = [cl for cl in population ]
reliable = sorted(reliable, key=lambda cl: -cl.fitness)

for cl in reliable[:10]:
    print(cl.condition)
    print(cl.effect)
    print(cl.q, cl.r, cl.exp, cl.num)
    print("\n")

In [None]:
def parse_metrics(metrics):
    lst = [[d['agent']['trial'], d['performance']['reward']] for d in metrics]

    df = pd.DataFrame(lst, columns=['trial', 'reward'])
    df = df.set_index('trial')

    return df

df = parse_metrics(metrics)
df['mode'] = df.index.map(lambda t: "explore" if t % 2 == 0 else "exploit")
df.head()

In [None]:
explore_df = df[df['mode'] == 'explore']
exploit_df = df[df['mode'] == 'exploit']

In [None]:
fig, ax = plt.subplots()
explore_df['reward'].rolling(window=500).mean().plot(label='explore', ax=ax)
exploit_df['reward'].rolling(window=500).mean().plot(label='exploit', ax=ax)

ax.set_xlabel('Trial')
ax.set_ylabel('Average reward')

plt.legend()
plt.show()