In [11]:
import os
from utils.run_utils import Runner

import gym
import gym_maze

from lcs.agents import Agent
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER, ReplayMemory, ReplayMemorySample
from lcs.agents.acs2rer import ACS2RER, Configuration as CFG_ACS2RER
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [12]:
MAZE = "Maze5-v0" 
EXPLORE_TRIALS = 100
EXPLOIT_TRIALS = 100

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 100
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [3]


#######

REPEAT_START = 1
REPEAT = 10

EXPERIMENT_NAME = "Maze5_TEST13" # Please edit if running new experiment to do not override saved results.


In [13]:
runner = Runner('MAZE', EXPERIMENT_NAME, MAZE)

## METRICS

In [14]:
def _get_transitions():
    knowledge_env = gym.make(MAZE)
    transitions = knowledge_env.env.get_transitions()
    transitions = list(map(lambda t: [knowledge_env.env.maze.perception(t[0]), t[1], knowledge_env.env.maze.perception(t[2])], transitions))

    return transitions

TRANSITIONS = _get_transitions()
TRANSITIONS_LENGTH = len(TRANSITIONS)

def _maze_knowledge(population) -> float:
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]

    # Count how many transitions are anticipated correctly
    nr_correct = 0

    # For all possible destinations from each path cell
    for p0, action, p1 in TRANSITIONS:
        if any([True for cl in reliable_classifiers
                if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1

    return nr_correct / TRANSITIONS_LENGTH * 100.0

def _maze_specificity(population) -> float:
    pop_len = len(population)
    if(pop_len) == 0:
        return 0
    return sum(map(lambda c: c.specificity, population)) / pop_len

def _maze_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': _maze_knowledge(pop),
        "specificity": _maze_specificity(agent.population)
    }
    metrics.update(population_metrics(pop, env))
    
    return metrics

def _weight_func_reward(rm: ReplayMemory, sample: ReplayMemorySample):
    if(sample.reward == 0):
        return 1
    
    return 5

def _weight_func_unique(rm: ReplayMemory, sample: ReplayMemorySample):
    existing_count = sum(1 for s in rm if sample.state == s.state and sample.action == s.action and sample.reward == s.reward and sample.next_state == s.next_state and sample.done == s.done)

    return 1 / (existing_count * 2 + 1)


## EXPERIMENT

In [15]:
def _run_experiment(agent, path):
    runner.run_experiment(agent, gym.make(MAZE), EXPLORE_TRIALS, EXPLOIT_TRIALS, path)

def run_acs2_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2(cfg)

        _run_experiment(agent, f'{i}')

def _run_acs2er_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-ER', f'{i}'))

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

def _run_acs2per_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_reward,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_reward', f'{i}'))

def run_acs2per_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - reward")
        _run_acs2per_experiment(er_samples_number)
        print(f"END - ACS2pER - reward")


def _run_acs2per2_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_unique,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_unique', f'{i}'))

def run_acs2per2_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - unique")
        _run_acs2per2_experiment(er_samples_number)
        print(f"END - ACS2pER - unique")


def _run_acs2rer_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-RER', f'{i}'))

def run_acs2rer_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2rer_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

### RUN ACS2 Experiments

In [16]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 10, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.09729910000169184, 'knowledge': 0.0, 'specificity': 0.4805276381909548, 'population': 199, 'numerosity': 199, 'reliable': 5}
INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.15709369999967748, 'knowledge': 2.73972602739726, 'specificity': 0.5693430656934306, 'population': 274, 'numerosity': 274, 'reliable': 20}
INFO:lcs.agents.Agent:{'trial': 30, 'steps_in_trial': 32, 'reward': 1000, 'perf_time': 0.10885609999968437, 'knowledge': 3.4246575342465753, 'specificity': 0.5950520833333334, 'population': 288, 'numerosity': 288, 'reliable': 22}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 38, 'reward': 1000, 'perf_time': 0.09710289999929955, 'knowledge': 8.21917808219178, 'specificity': 0.6070261437908496, 'population': 306, 'numerosity': 306, 'reliable': 34}
INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 18, 'reward': 1000, 'perf_time': 0.06899960000009742

### RUN ACS2ER Experiments

In [17]:
run_acs2er_experiments()

START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 10, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.0521962999991956, 'knowledge': 4.10958904109589, 'specificity': 0.48, 'population': 400, 'numerosity': 400, 'reliable': 27}
INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.2988745000002382, 'knowledge': 24.65753424657534, 'specificity': 0.5783482142857143, 'population': 560, 'numerosity': 560, 'reliable': 90}
INFO:lcs.agents.Agent:{'trial': 30, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.343197499998496, 'knowledge': 30.136986301369863, 'specificity': 0.6023489932885906, 'population': 596, 'numerosity': 596, 'reliable': 129}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 16, 'reward': 1000, 'perf_time': 0.4421340000008058, 'knowledge': 35.61643835616438, 'specificity': 0.6167898193760263, 'population': 609, 'numerosity': 609, 'reliable': 158}
INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 22, 'reward': 1000, 'perf_time': 0.5999621999981173, 'kn

END - ACS2ER - 3


In [18]:
run_acs2per_experiments()

START - ACS2pER - reward


INFO:lcs.agents.Agent:{'trial': 10, 'steps_in_trial': 15, 'reward': 1000, 'perf_time': 0.3925626000018383, 'knowledge': 2.73972602739726, 'specificity': 0.47694174757281554, 'population': 206, 'numerosity': 206, 'reliable': 8}
INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 47, 'reward': 1000, 'perf_time': 0.6905871999988449, 'knowledge': 9.58904109589041, 'specificity': 0.5375, 'population': 300, 'numerosity': 300, 'reliable': 32}
INFO:lcs.agents.Agent:{'trial': 30, 'steps_in_trial': 18, 'reward': 1000, 'perf_time': 0.3286509000026854, 'knowledge': 11.643835616438356, 'specificity': 0.5470219435736677, 'population': 319, 'numerosity': 319, 'reliable': 47}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 24, 'reward': 1000, 'perf_time': 0.3746353000024101, 'knowledge': 15.068493150684931, 'specificity': 0.5610119047619048, 'population': 336, 'numerosity': 336, 'reliable': 59}
INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 8, 'reward': 1000, 'perf_time': 0.109667400000

END - ACS2pER - reward


In [19]:
run_acs2per2_experiments()

START - ACS2pER - unique


INFO:lcs.agents.Agent:{'trial': 10, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.022226000000955537, 'knowledge': 0.684931506849315, 'specificity': 0.502851711026616, 'population': 263, 'numerosity': 263, 'reliable': 10}
INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.2896473999971931, 'knowledge': 13.698630136986301, 'specificity': 0.5807174887892377, 'population': 446, 'numerosity': 446, 'reliable': 47}
INFO:lcs.agents.Agent:{'trial': 30, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.1945317999998224, 'knowledge': 24.65753424657534, 'specificity': 0.6085714285714285, 'population': 525, 'numerosity': 525, 'reliable': 82}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 12, 'reward': 1000, 'perf_time': 0.45187609999993583, 'knowledge': 34.93150684931507, 'specificity': 0.631155303030303, 'population': 528, 'numerosity': 528, 'reliable': 109}
INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 16, 'reward': 1000, 'perf_time': 0.5

END - ACS2pER - unique


In [20]:

run_acs2rer_experiments()

START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 10, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.8651343999990786, 'knowledge': 6.164383561643835, 'specificity': 0.4817073170731707, 'population': 369, 'numerosity': 369, 'reliable': 28}
INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.059281199999532, 'knowledge': 19.17808219178082, 'specificity': 0.5508333333333333, 'population': 450, 'numerosity': 450, 'reliable': 91}
INFO:lcs.agents.Agent:{'trial': 30, 'steps_in_trial': 19, 'reward': 1000, 'perf_time': 0.39295660000061616, 'knowledge': 23.972602739726025, 'specificity': 0.5776673640167364, 'population': 478, 'numerosity': 478, 'reliable': 121}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.083980999999767, 'knowledge': 28.767123287671232, 'specificity': 0.5973011363636364, 'population': 528, 'numerosity': 528, 'reliable': 130}
INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.05753299

END - ACS2ER - 3
