In [1]:
# Logger
import logging
logging.basicConfig(level=logging.INFO)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import local paths
import sys, os
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('../../../../openai-envs'))

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs.representations.RealValueEncoder import RealValueEncoder
from lcs.agents.racs import RACS, Configuration

# Load OpenAI environments
import gym
import gym_checkerboard

In [2]:
# checkerboard env
env = gym.make('checkerboard-2D-3div-v0')
env.reset();

In [3]:
# agent
ENCODER_BITS = 7

cfg = Configuration(env.observation_space.shape[0], env.action_space.n,
                    encoder=RealValueEncoder(ENCODER_BITS),
                    epsilon=1.0,  # exploration-exploitation ratio
                    do_ga=True,
                    theta_r=0.9,  # reliablity threshold
                    theta_i=0.3,  # inadequacy threshold
                    theta_ga=100,
                    chi=0.5,  # cross-over probability
                    mu=0.2)  # mutation probability

In [4]:
agent = RACS(cfg)

In [5]:
%%time

TRIALS = 50_000
population, metrics = agent.explore_exploit(env, TRIALS)

INFO:lcs.agents.racs.RACS:{'agent': {'population': 1, 'numerosity': 1, 'reliable': 0, 'fitness': 0.025, 'cover_ratio': 0.6666666666666666, 'region_1': 0.0, 'region_2': 0.3333333333333333, 'region_3': 0.0, 'region_4': 0.6666666666666666, 'trial': 0, 'steps': 1, 'total_steps': 1}, 'environment': None, 'performance': {'reward': 1}}
INFO:lcs.agents.racs.RACS:{'agent': {'population': 521, 'numerosity': 573, 'reliable': 0, 'fitness': 0.2165707586431726, 'cover_ratio': 0.3875547226462338, 'region_1': 0.3902751119641715, 'region_2': 0.22264875239923224, 'region_3': 0.0012795905310300703, 'region_4': 0.3857965451055662, 'trial': 1000, 'steps': 1, 'total_steps': 1001}, 'environment': None, 'performance': {'reward': 1}}
INFO:lcs.agents.racs.RACS:{'agent': {'population': 1290, 'numerosity': 1459, 'reliable': 0, 'fitness': 0.2177259977628051, 'cover_ratio': 0.2990966245498356, 'region_1': 0.4682170542635659, 'region_2': 0.23617571059431525, 'region_3': 0.0005167958656330749, 'region_4': 0.295090439

KeyboardInterrupt: 

In [6]:
metrics[-1]

Exception ignored in: 'zmq.backend.cython.message.Frame.__dealloc__'
Traceback (most recent call last):
  File "zmq/backend/cython/checkrc.pxd", line 12, in zmq.backend.cython.checkrc._check_rc
KeyboardInterrupt: 


NameError: name 'metrics' is not defined

In [None]:
reliable = [cl for cl in population if cl.is_reliable()]
reliable = sorted(reliable, key=lambda cl: -cl.fitness)

for cl in reliable[:5]:
    print(cl.condition)
    print(cl.effect)
    print(f"q: {cl.q:.3f}, r: {cl.r:.3f}, exp: {cl.exp}, num: {cl.num}")
    print("\n")

In [None]:
def parse_metrics(metrics):
    lst = [[
        d['agent']['trial'],
        d['agent']['population'],
        d['agent']['reliable'],
        d['performance']['reward'],
        d['agent']['region_1'],
        d['agent']['region_2'],
        d['agent']['region_3'],
        d['agent']['region_4'],
    ] for d in metrics]

    df = pd.DataFrame(lst, columns=['trial', 'population', 'reliable', 'reward', 'region_1', 'region_2', 'region_3', 'region_4'])
    df = df.set_index('trial')

    return df

df = parse_metrics(metrics)
df['mode'] = df.index.map(lambda t: "explore" if t % 2 == 0 else "exploit")
df.head()

In [None]:
explore_df = df[df['mode'] == 'explore']
exploit_df = df[df['mode'] == 'exploit']

In [None]:
fig, ax = plt.subplots()
explore_df['reward'].rolling(window=1000).mean().plot(label='explore', ax=ax)
exploit_df['reward'].rolling(window=1000).mean().plot(label='exploit', ax=ax)

ax.set_title('Performance')
ax.set_xlabel('Trial')
ax.set_ylabel('Average reward')

plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots()
exploit_df['region_1'].rolling(window=50).mean().plot(label='Region 1 [pi, qi)', ax=ax)
exploit_df['region_2'].rolling(window=50).mean().plot(label='Region 2 [pmin, qi)', ax=ax)
exploit_df['region_3'].rolling(window=50).mean().plot(label='Region 3 [pi, qmax)', ax=ax)
exploit_df['region_4'].rolling(window=50).mean().plot(label='Region 4 [pmin, qmax)', ax=ax)

ax.set_title('Condition Interval Regions')
ax.set_xlabel('Trial')
ax.set_ylabel('Proportion')

plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots()
df['population'].rolling(window=1000).mean().plot(label='macroclassifiers', ax=ax)
df['reliable'].rolling(window=1000).mean().plot(label='reliable', ax=ax)

ax.set_title('Classifier numerosity')
ax.set_xlabel('Trial')
ax.set_ylabel('Number of classifiers')

plt.legend()
plt.show()