In [18]:
import os
import itertools
import numpy as np
from utils.run_utils import Runner

import gym
import gym_multiplexer
from gym_multiplexer.utils import get_correct_answer

from lcs import Perception
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER, ReplayMemory, ReplayMemorySample
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [19]:
BITS = 11 # 6 | 11 | 20 | 37
MPX = f'boolean-multiplexer-{BITS}bit-v0'
EXPLORE_TRIALS = 500
EXPLOIT_TRIALS = 100
METRICS_FREQUENCY = 1
KNOWLEDGE_STATE_SAMPLES = 1000 # applies only when 20 or 37 bits, otherwise all possible states verified

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 50
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [3]



#######

REPEAT_START = 1
REPEAT = 10

EXPERIMENT_NAME = "MPX11_TEST2" # Please edit if running new experiment to do not override saved results.

In [20]:
runner = Runner('MPX', EXPERIMENT_NAME, MPX)

## METRICS

In [21]:
class MpxObservationWrapper(gym.ObservationWrapper):
    def observation(self, observation):
        return [str(x) for x in observation]
        
knowledge_env = MpxObservationWrapper(gym.make(MPX))

def get_transitions(states):
    transitions = list(map(lambda s: 
            (Perception([str(float(x)) for x in s] + ['0.0']), 
            get_correct_answer(list(s) + [0], knowledge_env.env.env.control_bits), 
            Perception([str(float(x)) for x in s] + ['1.0'])), 
        states))

    return transitions

def _mpx_knowledge(population, transitions, transitions_length) -> float:
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]

    if(len(reliable_classifiers) == 0):
        return 0

    nr_correct = 0

    for p0, correct_answer, p1 in transitions:
        if any([True for cl in reliable_classifiers if
                cl.predicts_successfully(
                    p0,
                    correct_answer,
                    p1)]):

            nr_correct += 1

    return nr_correct / transitions_length

if BITS == 6 or BITS == 11: # Verify all 
    def get_all_transitions():
        states = list(itertools.product([0, 1], repeat=BITS))
        return get_transitions(states)

    TRANSITIONS = get_all_transitions()
    TRANSITIONS_LENGTH = len(TRANSITIONS)
    
    def mpx_knowledge(population) -> float:
        return _mpx_knowledge(population, TRANSITIONS, TRANSITIONS_LENGTH)

elif BITS == 20 or BITS == 37: # Verify samples
    def get_sampled_transitions():
        states = np.random.randint(2, size=(KNOWLEDGE_STATE_SAMPLES, BITS))
        return get_transitions(states)

    def mpx_knowledge(population) -> float:
        return _mpx_knowledge(population, get_sampled_transitions(), KNOWLEDGE_STATE_SAMPLES)
else:
    raise Exception(f'Unsupported BITS number: {BITS}')
    
def mpx_specificity(population) -> float:
    pop_len = len(population)
    if(pop_len) == 0:
        return 0
    return sum(map(lambda c: c.specificity, population)) / pop_len

def mpx_metrics(agent, env):
    metrics = {
        "knowledge": mpx_knowledge(agent.population),
        "specificity": mpx_specificity(agent.population)
    }
    metrics.update(population_metrics(agent.population, env))

    return metrics


def _weight_func_reward(rm: ReplayMemory, sample: ReplayMemorySample):
    if(sample.reward == 0):
        return 1
    
    return 5

def _weight_func_unique(rm: ReplayMemory, sample: ReplayMemorySample):
    existing_count = sum(1 for s in rm if sample.state == s.state and sample.action == s.action and sample.reward == s.reward and sample.next_state == s.next_state and sample.done == s.done)

    return 1 / (existing_count * 2 + 1)

## EXPERIMENT

In [22]:
def _run_experiment(agent, path):
    runner.run_experiment(agent, MpxObservationWrapper(gym.make(MPX)), EXPLORE_TRIALS, EXPLOIT_TRIALS, path)
    
def run_acs2_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2(
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2(cfg)

        _run_experiment(agent, f'{i}')

def _run_acs2er_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-ER', f'{i}'))

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

def _run_acs2per_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_reward,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_reward', f'{i}'))

def run_acs2per_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - reward")
        _run_acs2per_experiment(er_samples_number)
        print(f"END - ACS2pER - reward")


def _run_acs2per2_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent
        cfg = CFG_ACS2ER(    
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_unique,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_unique', f'{i}'))

def run_acs2per2_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - unique")
        _run_acs2per2_experiment(er_samples_number)
        print(f"END - ACS2pER - unique")

### RUN ACS2 Experiments

In [23]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.00255250000009255, 'knowledge': 0, 'specificity': 0.4511019283746557, 'population': 121, 'numerosity': 121, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.007835399999748915, 'knowledge': 0, 'specificity': 0.49975704567541257, 'population': 343, 'numerosity': 343, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0031720999995741295, 'knowledge': 0, 'specificity': 0.5041229385307348, 'population': 667, 'numerosity': 667, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.010331700001188437, 'knowledge': 0, 'specificity': 0.5054903730445248, 'population': 1108, 'numerosity': 1108, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.028134399999544257, 'knowledge': 0.00048828125, 'specificity': 

### RUN ACS2ER Experiments

In [24]:
run_acs2er_experiments()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0009033999995153863, 'knowledge': 0, 'specificity': 0.041666666666666664, 'population': 2, 'numerosity': 2, 'reliable': 0}


START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0398220000006404, 'knowledge': 0, 'specificity': 0.47479307750188104, 'population': 443, 'numerosity': 443, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.04901210000025458, 'knowledge': 0.0009765625, 'specificity': 0.47603438228438283, 'population': 1144, 'numerosity': 1144, 'reliable': 5}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.15089959999932034, 'knowledge': 0.2060546875, 'specificity': 0.49901590300739795, 'population': 2117, 'numerosity': 2117, 'reliable': 41}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.16633229999933974, 'knowledge': 0.32958984375, 'specificity': 0.5165682414698165, 'population': 3048, 'numerosity': 3048, 'reliable': 60}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.38436270000238437, 'knowl

END - ACS2ER - 3


In [25]:
run_acs2per_experiments()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.001034300003084354, 'knowledge': 0, 'specificity': 0.027777777777777776, 'population': 3, 'numerosity': 3, 'reliable': 0}


START - ACS2pER - reward


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.019391199999517994, 'knowledge': 0.45361328125, 'specificity': 0.4806079664570232, 'population': 318, 'numerosity': 318, 'reliable': 7}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.02926379999917117, 'knowledge': 0.71728515625, 'specificity': 0.4630143319463713, 'population': 721, 'numerosity': 721, 'reliable': 57}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.053631700000551064, 'knowledge': 0.85009765625, 'specificity': 0.4689160467587691, 'population': 941, 'numerosity': 941, 'reliable': 108}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.05829950000043027, 'knowledge': 0.91259765625, 'specificity': 0.47995042286380984, 'population': 1143, 'numerosity': 1145, 'reliable': 190}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.07039820000136

END - ACS2pER - reward


In [26]:
run_acs2per2_experiments()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0009910999979183543, 'knowledge': 0, 'specificity': 0.0, 'population': 2, 'numerosity': 2, 'reliable': 0}


START - ACS2pER - unique


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.028417900000931695, 'knowledge': 0, 'specificity': 0.40220771144278666, 'population': 536, 'numerosity': 536, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.06427669999902719, 'knowledge': 0.22998046875, 'specificity': 0.439279983955073, 'population': 1662, 'numerosity': 1662, 'reliable': 11}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.08655390000058105, 'knowledge': 0.42041015625, 'specificity': 0.457071628359478, 'population': 2741, 'numerosity': 2741, 'reliable': 38}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.13258889999997336, 'knowledge': 0.51708984375, 'specificity': 0.4984437751004014, 'population': 3320, 'numerosity': 3322, 'reliable': 65}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.318454300002486, 'kno

END - ACS2pER - unique
