In [17]:
import os
import itertools
import numpy as np
from utils.run_utils import Runner

import gym
import gym_multiplexer
from gym_multiplexer.utils import get_correct_answer

from lcs import Perception
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2, Classifier
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER, ReplayMemory, ReplayMemorySample
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [18]:
BITS = 6 # 6 | 11 | 20 | 37
MPX = f'boolean-multiplexer-{BITS}bit-v0'
EXPLORE_TRIALS = 500
EXPLOIT_TRIALS = 500
METRICS_FREQUENCY = 1
KNOWLEDGE_STATE_SAMPLES = 1000 # applies only when 20 or 37 bits, otherwise all possible states verified

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 50
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [3]



#######

REPEAT_START = 1
REPEAT = 15

EXPERIMENT_NAME = "MPX6_TEST2" # Please edit if running new experiment to do not override saved results.

In [19]:
runner = Runner('MPX', EXPERIMENT_NAME, MPX)

## METRICS

In [20]:
class MpxObservationWrapper(gym.ObservationWrapper):
    def observation(self, observation):
        return [str(x) for x in observation]
        
knowledge_env = MpxObservationWrapper(gym.make(MPX))

def get_transitions(states):
    transitions = list(map(lambda s: 
            (Perception([str(float(x)) for x in s] + ['0.0']), 
            get_correct_answer(list(s) + [0], knowledge_env.env.env.control_bits), 
            Perception([str(float(x)) for x in s] + ['1.0'])), 
        states))

    return transitions

def _mpx_knowledge(population, transitions, transitions_length) -> float:
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]

    if(len(reliable_classifiers) == 0):
        return 0

    nr_correct = 0

    for p0, correct_answer, p1 in transitions:
        if any([True for cl in reliable_classifiers if
                cl.predicts_successfully(
                    p0,
                    correct_answer,
                    p1)]):

            nr_correct += 1

    return nr_correct / transitions_length

if BITS == 6 or BITS == 11: # Verify all 
    def get_all_transitions():
        states = list(itertools.product([0, 1], repeat=BITS))
        return get_transitions(states)

    TRANSITIONS = get_all_transitions()
    TRANSITIONS_LENGTH = len(TRANSITIONS)
    
    def mpx_knowledge(population) -> float:
        return _mpx_knowledge(population, TRANSITIONS, TRANSITIONS_LENGTH)

elif BITS == 20 or BITS == 37: # Verify samples
    def get_sampled_transitions():
        states = np.random.randint(2, size=(KNOWLEDGE_STATE_SAMPLES, BITS))
        return get_transitions(states)

    def mpx_knowledge(population) -> float:
        return _mpx_knowledge(population, get_sampled_transitions(), KNOWLEDGE_STATE_SAMPLES)
else:
    raise Exception(f'Unsupported BITS number: {BITS}')


def mpx_specificity(population) -> float:
    pop_len = len(population)
    if(pop_len) == 0:
        return 0
    return sum(map(lambda c: c.specificity, population)) / pop_len

def mpx_metrics(agent, env):
    metrics = {
        "knowledge": mpx_knowledge(agent.population),
        "specificity": mpx_specificity(agent.population)
    }
    metrics.update(population_metrics(agent.population, env))

    return metrics


def _weight_func_reward(rm: ReplayMemory, sample: ReplayMemorySample):
    if(sample.reward == 0):
        return 1
    
    return 5

def _weight_func_unique(rm: ReplayMemory, sample: ReplayMemorySample):
    existing_count = sum(1 for s in rm if sample.state == s.state and sample.action == s.action and sample.reward == s.reward and sample.next_state == s.next_state and sample.done == s.done)

    return 1 / (existing_count * 2 + 1)

## EXPERIMENT

In [21]:
def _run_experiment(agent, path):
    runner.run_experiment(agent, MpxObservationWrapper(gym.make(MPX)), EXPLORE_TRIALS, EXPLOIT_TRIALS, path)
    
def run_acs2_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2(
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2(cfg)

        _run_experiment(agent, f'{i}')

def _run_acs2er_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-ER', f'{i}'))

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

def _run_acs2per_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_reward,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_reward', f'{i}'))

def run_acs2per_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - reward")
        _run_acs2per_experiment(er_samples_number)
        print(f"END - ACS2pER - reward")


def _run_acs2per2_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent
        cfg = CFG_ACS2ER(    
            classifier_length=knowledge_env.env.observation_space.n,
            number_of_possible_actions=2,
            do_ga=True,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_unique,
            user_metrics_collector_fcn=mpx_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_unique', f'{i}'))

def run_acs2per2_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - unique")
        _run_acs2per2_experiment(er_samples_number)
        print(f"END - ACS2pER - unique")

### RUN ACS2 Experiments

In [22]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.0025700999995024176, 'knowledge': 0, 'specificity': 0.5738498789346246, 'population': 59, 'numerosity': 59, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0025284000003011897, 'knowledge': 0, 'specificity': 0.641114982578397, 'population': 123, 'numerosity': 123, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0017912999992404366, 'knowledge': 0.125, 'specificity': 0.6748224151539067, 'population': 181, 'numerosity': 182, 'reliable': 1}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0016208999995797058, 'knowledge': 0.4375, 'specificity': 0.6931454196028193, 'population': 223, 'numerosity': 224, 'reliable': 6}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0031688999997641076, 'knowledge': 0.4375, 'specificit

### RUN ACS2ER Experiments

In [23]:
run_acs2er_experiments()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.001298799999858602, 'knowledge': 0, 'specificity': 0.09523809523809523, 'population': 3, 'numerosity': 3, 'reliable': 0}


START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.01856859999952576, 'knowledge': 0, 'specificity': 0.6438095238095232, 'population': 150, 'numerosity': 150, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.027008000000023458, 'knowledge': 0.234375, 'specificity': 0.6380952380952376, 'population': 270, 'numerosity': 270, 'reliable': 7}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.022119199999906414, 'knowledge': 0.734375, 'specificity': 0.6357717349674458, 'population': 373, 'numerosity': 375, 'reliable': 22}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.03256080000028305, 'knowledge': 0.921875, 'specificity': 0.6448979591836742, 'population': 420, 'numerosity': 424, 'reliable': 36}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.022947800000110874, 'knowledge': 0.953

END - ACS2ER - 3


In [24]:
run_acs2per_experiments()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0013446999992083875, 'knowledge': 0, 'specificity': 0.07142857142857142, 'population': 2, 'numerosity': 2, 'reliable': 0}


START - ACS2pER - reward


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.01588179999998829, 'knowledge': 0.421875, 'specificity': 0.6390977443609022, 'population': 114, 'numerosity': 114, 'reliable': 3}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.022705299999870476, 'knowledge': 0.6875, 'specificity': 0.6235201262825575, 'population': 181, 'numerosity': 181, 'reliable': 28}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.022916700000678247, 'knowledge': 0.8125, 'specificity': 0.6292947558770354, 'population': 237, 'numerosity': 238, 'reliable': 45}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.03009329999986221, 'knowledge': 0.890625, 'specificity': 0.6254980079681283, 'population': 251, 'numerosity': 253, 'reliable': 49}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.02322369999910734, 'knowledge': 0.96875,

END - ACS2pER - reward


In [25]:
run_acs2per2_experiments()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.0016497999986313516, 'knowledge': 0, 'specificity': 0.0, 'population': 1, 'numerosity': 1, 'reliable': 0}


START - ACS2pER - unique


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.02658770000016375, 'knowledge': 0, 'specificity': 0.6280323450134768, 'population': 159, 'numerosity': 159, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 1, 'reward': 0, 'perf_time': 0.029407199999695877, 'knowledge': 0.234375, 'specificity': 0.6515930113052427, 'population': 278, 'numerosity': 278, 'reliable': 12}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.03494289999980538, 'knowledge': 0.515625, 'specificity': 0.6669387755102058, 'population': 350, 'numerosity': 350, 'reliable': 29}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.03243560000009893, 'knowledge': 0.671875, 'specificity': 0.6764606551840606, 'population': 423, 'numerosity': 427, 'reliable': 41}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.03610909999952128, 'knowledge': 0.734375, 's

END - ACS2pER - unique
