In [9]:
import os
import itertools
import numpy as np
from utils.run_utils import Runner

import gym
import gym_multiplexer
from gym_multiplexer.utils import get_correct_answer

from lcs import Perception
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER, ReplayMemory, ReplayMemorySample
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [10]:
BITS = 11 # 6 | 11 | 20 | 37
MPX = f'boolean-multiplexer-{BITS}bit-v0'
EXPLORE_TRIALS = 500
EXPLOIT_TRIALS = 100
METRICS_FREQUENCY = 1
KNOWLEDGE_STATE_SAMPLES = 1000 # applies only when 20 or 37 bits, otherwise all possible states verified

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 50
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [3]



#######

REPEAT_START = 1
REPEAT = 3

EXPERIMENT_NAME = "MPX11_TEST1" # Please edit if running new experiment to do not override saved results.

In [11]:
runner = Runner('MPX', EXPERIMENT_NAME, MPX)

## METRICS

In [12]:
class MpxObservationWrapper(gym.ObservationWrapper):
    def observation(self, observation):
        return [str(x) for x in observation]
        
knowledge_env = MpxObservationWrapper(gym.make(MPX))

def get_transitions(states):
    transitions = list(map(lambda s: 
            (Perception([str(float(x)) for x in s] + ['0.0']), 
            get_correct_answer(list(s) + [0], knowledge_env.env.env.control_bits), 
            Perception([str(float(x)) for x in s] + ['1.0'])), 
        states))

    return transitions

def _mpx_knowledge(population, transitions, transitions_length) -> float:
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]

    if(len(reliable_classifiers) == 0):
        return 0

    nr_correct = 0

    for p0, correct_answer, p1 in transitions:
        if any([True for cl in reliable_classifiers if
                cl.predicts_successfully(
                    p0,
                    correct_answer,
                    p1)]):

            nr_correct += 1

    return nr_correct / transitions_length

if BITS == 6 or BITS == 11: # Verify all 
    def get_all_transitions():
        states = list(itertools.product([0, 1], repeat=BITS))
        return get_transitions(states)

    TRANSITIONS = get_all_transitions()
    TRANSITIONS_LENGTH = len(TRANSITIONS)
    
    def mpx_knowledge(population) -> float:
        return _mpx_knowledge(population, TRANSITIONS, TRANSITIONS_LENGTH)

elif BITS == 20 or BITS == 37: # Verify samples
    def get_sampled_transitions():
        states = np.random.randint(2, size=(KNOWLEDGE_STATE_SAMPLES, BITS))
        return get_transitions(states)

    def mpx_knowledge(population) -> float:
        return _mpx_knowledge(population, get_sampled_transitions(), KNOWLEDGE_STATE_SAMPLES)
else:
    raise Exception(f'Unsupported BITS number: {BITS}')
    

def mpx_metrics(agent, env):
    metrics = {
        "knowledge": mpx_knowledge(agent.population)
    }
    metrics.update(population_metrics(agent.population, env))

    return metrics


def _weight_func_reward(rm: ReplayMemory, sample: ReplayMemorySample):
    if(sample.reward == 0):
        return 1
    
    return 5

def _weight_func_unique(rm: ReplayMemory, sample: ReplayMemorySample):
    existing_count = sum(1 for s in rm if sample.state == s.state and sample.action == s.action and sample.reward == s.reward and sample.next_state == s.next_state and sample.done == s.done)

    return 1 / (existing_count * 2 + 1)

## EXPERIMENT

In [13]:
def _run_experiment(agent, path):
    runner.run_experiment(agent, MpxObservationWrapper(gym.make(MPX)), EXPLORE_TRIALS, EXPLOIT_TRIALS, path)
    
def run_acs2_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2(
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2(cfg)

        _run_experiment(agent, f'{i}')

def _run_acs2er_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-ER', f'{i}'))

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

def _run_acs2per_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_reward,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_reward', f'{i}'))

def run_acs2per_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - reward")
        _run_acs2per_experiment(er_samples_number)
        print(f"END - ACS2pER - reward")


def _run_acs2per2_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent
        cfg = CFG_ACS2ER(    
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_unique,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_unique', f'{i}'))

def run_acs2per2_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - unique")
        _run_acs2per2_experiment(er_samples_number)
        print(f"END - ACS2pER - unique")

### RUN ACS2 Experiments

In [14]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0017652999999882013, 'knowledge': 0, 'population': 94, 'numerosity': 94, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.001418299999983219, 'knowledge': 0, 'population': 195, 'numerosity': 195, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.007408300000008694, 'knowledge': 0, 'population': 323, 'numerosity': 323, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.0022011000000077274, 'knowledge': 0, 'population': 456, 'numerosity': 456, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.004123500000019931, 'knowledge': 0, 'population': 697, 'numerosity': 698, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0039660999999853175, 'knowledge

### RUN ACS2ER Experiments

In [15]:
run_acs2er_experiments()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.0008976000000018303, 'knowledge': 0, 'population': 3, 'numerosity': 3, 'reliable': 0}


START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.02106679999999983, 'knowledge': 0, 'population': 564, 'numerosity': 564, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.04771809999999732, 'knowledge': 0.00146484375, 'population': 1328, 'numerosity': 1328, 'reliable': 5}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.08851490000000695, 'knowledge': 0.25341796875, 'population': 2365, 'numerosity': 2365, 'reliable': 45}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.11666439999999056, 'knowledge': 0.42919921875, 'population': 3118, 'numerosity': 3118, 'reliable': 84}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.11896439999998165, 'knowledge': 0.47021484375, 'population': 3763, 'numerosity': 3765, 'reliable': 103}
INFO:lcs.agents.Agent:{'trial': 350, 'steps_in_trial': 1, 

END - ACS2ER - 3


In [16]:
run_acs2per_experiments()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.0007014999998773419, 'knowledge': 0, 'population': 3, 'numerosity': 3, 'reliable': 0}


START - ACS2pER - reward


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.02769650000004731, 'knowledge': 0.65625, 'population': 404, 'numerosity': 404, 'reliable': 7}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.04367879999995239, 'knowledge': 0.82275390625, 'population': 824, 'numerosity': 824, 'reliable': 57}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.036965900000041074, 'knowledge': 0.8681640625, 'population': 1042, 'numerosity': 1043, 'reliable': 122}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0733989999998812, 'knowledge': 0.92236328125, 'population': 1010, 'numerosity': 1014, 'reliable': 171}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.035982600000124876, 'knowledge': 0.9580078125, 'population': 950, 'numerosity': 960, 'reliable': 168}
INFO:lcs.agents.Agent:{'trial': 350, 'steps_in_trial': 1, 'r

END - ACS2pER - reward


In [17]:
run_acs2per2_experiments()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.0029211999999461113, 'knowledge': 0, 'population': 2, 'numerosity': 2, 'reliable': 0}


START - ACS2pER - unique


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.0408873999995194, 'knowledge': 0, 'population': 456, 'numerosity': 456, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.07748759999958565, 'knowledge': 0.0791015625, 'population': 1462, 'numerosity': 1462, 'reliable': 6}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.1126853999994637, 'knowledge': 0.44287109375, 'population': 2398, 'numerosity': 2398, 'reliable': 42}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.11873509999986709, 'knowledge': 0.52880859375, 'population': 3265, 'numerosity': 3267, 'reliable': 92}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.1139885000002323, 'knowledge': 0.619140625, 'population': 3957, 'numerosity': 3961, 'reliable': 120}
INFO:lcs.agents.Agent:{'trial': 350, 'steps_in_trial': 1, 'reward':

END - ACS2pER - unique
