In [15]:
import os
from utils.run_utils import Runner

import gym
import gym_maze

from lcs.agents import Agent
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER, ReplayMemory, ReplayMemorySample
from lcs.agents.acs2rer import ACS2RER, Configuration as CFG_ACS2RER
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [16]:
MAZE = "Maze5-v0" 
EXPLORE_TRIALS = 200
EXPLOIT_TRIALS = 200

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 100
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [3]


#######

REPEAT_START = 1
REPEAT = 6

EXPERIMENT_NAME = "Maze5_TEST2" # Please edit if running new experiment to do not override saved results.


In [17]:
runner = Runner('MAZE', EXPERIMENT_NAME, MAZE)

## METRICS

In [18]:
def _get_transitions():
    knowledge_env = gym.make(MAZE)
    transitions = knowledge_env.env.get_transitions()
    transitions = list(map(lambda t: [knowledge_env.env.maze.perception(t[0]), t[1], knowledge_env.env.maze.perception(t[2])], transitions))

    return transitions

TRANSITIONS = _get_transitions()
TRANSITIONS_LENGTH = len(TRANSITIONS)

def _maze_knowledge(population) -> float:
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]

    # Count how many transitions are anticipated correctly
    nr_correct = 0

    # For all possible destinations from each path cell
    for p0, action, p1 in TRANSITIONS:
        if any([True for cl in reliable_classifiers
                if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1

    return nr_correct / TRANSITIONS_LENGTH * 100.0

def _maze_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': _maze_knowledge(pop)
    }
    metrics.update(population_metrics(pop, env))
    
    return metrics

def _rm_update(rm: ReplayMemory, sample: ReplayMemorySample):
    if(sample.reward == 0):
        rm.update(sample)
        return
    
    for _ in range(5):
        rm.update(ReplayMemorySample(sample.state, sample.action, sample.reward, sample.next_state, sample.done))

def _rm_update_2(rm: ReplayMemory, sample: ReplayMemorySample):
    if any(sample.state == s.state and sample.action == s.action and sample.reward == s.reward and sample.next_state == s.next_state and sample.done == s.done for s in rm):
        return

    rm.update(sample)


## EXPERIMENT

In [19]:
def _run_experiment(agent, path):
    runner.run_experiment(agent, gym.make(MAZE), EXPLORE_TRIALS, EXPLOIT_TRIALS, path)

def run_acs2_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2(cfg)

        _run_experiment(agent, f'{i}')

def _run_acs2er_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-ER', f'{i}'))

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

def _run_acs2per_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_rm_update_func=_rm_update,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_reward', f'{i}'))

def run_acs2per_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - reward")
        _run_acs2per_experiment(er_samples_number)
        print(f"END - ACS2pER - reward")


def _run_acs2per2_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=20,
            er_samples_number=er_samples_number,
            er_rm_update_func=_rm_update_2,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_unique', f'{i}'))

def run_acs2per2_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - unique")
        _run_acs2per2_experiment(er_samples_number)
        print(f"END - ACS2pER - unique")


def _run_acs2rer_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-RER', f'{i}'))

def run_acs2rer_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2rer_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

### RUN ACS2 Experiments

In [20]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 39, 'reward': 1000, 'perf_time': 0.09233580000000075, 'knowledge': 1.36986301369863, 'population': 381, 'numerosity': 381, 'reliable': 17}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0020759000001362438, 'knowledge': 2.73972602739726, 'population': 426, 'numerosity': 426, 'reliable': 20}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 13, 'reward': 1000, 'perf_time': 0.02980459999980667, 'knowledge': 9.58904109589041, 'population': 431, 'numerosity': 431, 'reliable': 39}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.13117049999982555, 'knowledge': 14.383561643835616, 'population': 446, 'numerosity': 446, 'reliable': 66}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 39, 'reward': 1000, 'perf_time': 0.0741725000000315, 'knowledge': 15.753424657534246, 'population': 444, 'numerosity': 444, 'reliable': 77}
INFO:lcs.agents.Agent:{'trial': 1

### RUN ACS2ER Experiments

In [21]:
run_acs2er_experiments()

START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 34, 'reward': 1000, 'perf_time': 0.40250779999996666, 'knowledge': 9.58904109589041, 'population': 365, 'numerosity': 365, 'reliable': 41}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 22, 'reward': 1000, 'perf_time': 0.2508563999999751, 'knowledge': 20.54794520547945, 'population': 422, 'numerosity': 422, 'reliable': 76}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 5, 'reward': 1000, 'perf_time': 0.06384909999997035, 'knowledge': 31.506849315068493, 'population': 477, 'numerosity': 477, 'reliable': 109}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 4, 'reward': 1000, 'perf_time': 0.04795030000013867, 'knowledge': 36.3013698630137, 'population': 490, 'numerosity': 490, 'reliable': 126}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 12, 'reward': 1000, 'perf_time': 0.14241100000003826, 'knowledge': 41.78082191780822, 'population': 498, 'numerosity': 498, 'reliable': 147}
INFO:lcs.agents.Agent:{'trial'

END - ACS2ER - 3


In [22]:
run_acs2per_experiments()

START - ACS2pER - reward


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 30, 'reward': 1000, 'perf_time': 0.5455969000004188, 'knowledge': 12.32876712328767, 'population': 416, 'numerosity': 416, 'reliable': 66}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 8, 'reward': 1000, 'perf_time': 0.11328259999982038, 'knowledge': 24.65753424657534, 'population': 465, 'numerosity': 465, 'reliable': 104}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 26, 'reward': 1000, 'perf_time': 0.4379389999999148, 'knowledge': 29.45205479452055, 'population': 489, 'numerosity': 489, 'reliable': 123}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.7773292999995647, 'knowledge': 32.19178082191781, 'population': 515, 'numerosity': 515, 'reliable': 140}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.749996099999862, 'knowledge': 41.0958904109589, 'population': 525, 'numerosity': 525, 'reliable': 170}
INFO:lcs.agents.Agent:{'trial': 120, '

END - ACS2pER - reward


In [23]:
run_acs2per2_experiments()

START - ACS2pER - unique


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.7732577000001584, 'knowledge': 19.863013698630137, 'population': 532, 'numerosity': 532, 'reliable': 76}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 24, 'reward': 1000, 'perf_time': 0.4183979999997973, 'knowledge': 48.63013698630137, 'population': 602, 'numerosity': 602, 'reliable': 162}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 9, 'reward': 1000, 'perf_time': 0.13932370000020455, 'knowledge': 65.06849315068493, 'population': 598, 'numerosity': 598, 'reliable': 223}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 18, 'reward': 1000, 'perf_time': 0.2981744999997318, 'knowledge': 71.91780821917808, 'population': 598, 'numerosity': 598, 'reliable': 270}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 49, 'reward': 1000, 'perf_time': 0.8519550000000891, 'knowledge': 73.28767123287672, 'population': 593, 'numerosity': 593, 'reliable': 295}
INFO:lcs.agents.Agent:{'trial': 

END - ACS2pER - unique


In [24]:

run_acs2rer_experiments()

START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.711688100000174, 'knowledge': 21.91780821917808, 'population': 484, 'numerosity': 484, 'reliable': 84}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.7545719999998255, 'knowledge': 44.52054794520548, 'population': 539, 'numerosity': 539, 'reliable': 173}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 17, 'reward': 1000, 'perf_time': 0.3021352000000661, 'knowledge': 56.16438356164384, 'population': 536, 'numerosity': 536, 'reliable': 243}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 6, 'reward': 1000, 'perf_time': 0.08190260000037597, 'knowledge': 67.8082191780822, 'population': 518, 'numerosity': 518, 'reliable': 282}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 6, 'reward': 1000, 'perf_time': 0.08226850000028207, 'knowledge': 69.17808219178082, 'population': 508, 'numerosity': 508, 'reliable': 296}
INFO:lcs.agents.Agent:{'trial': 120, '

END - ACS2ER - 3
