In [1]:
import os
from utils.run_utils import Runner

import gym
import gym_corridor

from lcs import Perception
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [2]:
CORRIDOR_LENGTH = 100  # 20 | 40 |100
COR = f'corridor-{CORRIDOR_LENGTH}-v0'
EXPLORE_TRIALS = 500
EXPLOIT_TRIALS = 500
METRICS_FREQUENCY = 1

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 1000
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [1,3,5,13]


#######

REPEAT_START = 1
REPEAT = 1

EXPERIMENT_NAME = "Corridor100_EXP1" # Please edit if running new experiment to do not override saved results.


In [3]:
runner = Runner('COR', EXPERIMENT_NAME, COR)

## METRICS

In [4]:
class CorridorObservationWrapper(gym.ObservationWrapper):
    def observation(self, observation):
        return observation,

env = CorridorObservationWrapper(gym.make(COR))
TRANSITIONS = env.env.get_transitions()

def _corridor_knowledge(population, environment):
    reliable = [c for c in population if c.is_reliable()]
    nr_correct = 0
    
    for start, action, end in TRANSITIONS:
        p0 = Perception((str(start),))
        p1 = Perception((str(end),))
        
        if any([True for cl in reliable if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1
            
    return nr_correct / len(TRANSITIONS) * 100.0
    
def corridor_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': _corridor_knowledge(pop, env)
    }
    metrics.update(population_metrics(pop, env))
    return metrics

## EXPERIMENT

In [5]:
def _run_experiment(agent, path):
    runner.run_experiment(agent, CorridorObservationWrapper(gym.make(COR)), EXPLORE_TRIALS, EXPLOIT_TRIALS, path)

def run_acs2_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2(
            classifier_length=1,
            number_of_possible_actions=2,
            metrics_trial_frequency=METRICS_FREQUENCY,
            user_metrics_collector_fcn=corridor_metrics)
        agent = ACS2(cfg)

    _run_experiment(agent, f'{i}')
        
def _run_acs2er_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=1,
            number_of_possible_actions=2,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=corridor_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_{er_samples_number}', f'{i}'))

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

### RUN ACS2 Experiments

In [6]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 5, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.05576369999999997, 'knowledge': 0.0, 'population': 94, 'numerosity': 94, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 10, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.07916609999999835, 'knowledge': 1.015228426395939, 'population': 159, 'numerosity': 159, 'reliable': 2}
INFO:lcs.agents.Agent:{'trial': 15, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.08975110000000086, 'knowledge': 3.0456852791878175, 'population': 179, 'numerosity': 179, 'reliable': 6}
INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.11483889999999874, 'knowledge': 7.614213197969544, 'population': 185, 'numerosity': 185, 'reliable': 15}
INFO:lcs.agents.Agent:{'trial': 25, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.09953499999999948, 'knowledge': 21.31979695431472, 'population': 190, 'numerosity': 190, 'reliable': 42}
INFO:lcs.agents.Agent:{'trial': 30, 'steps_in_trial': 200, 

### RUN ACS2ER Experiments

In [7]:
run_acs2er_experiments()

START - ACS2ER - 1


INFO:lcs.agents.Agent:{'trial': 5, 'steps_in_trial': 41, 'reward': 1000, 'perf_time': 0.06731979999999993, 'knowledge': 1.015228426395939, 'population': 82, 'numerosity': 82, 'reliable': 2}
INFO:lcs.agents.Agent:{'trial': 10, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.49616100000000074, 'knowledge': 6.598984771573605, 'population': 147, 'numerosity': 147, 'reliable': 13}
INFO:lcs.agents.Agent:{'trial': 15, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.28340419999999966, 'knowledge': 12.18274111675127, 'population': 170, 'numerosity': 170, 'reliable': 24}
INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.2849350000000008, 'knowledge': 14.720812182741117, 'population': 176, 'numerosity': 176, 'reliable': 29}
INFO:lcs.agents.Agent:{'trial': 25, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.3148314999999968, 'knowledge': 20.812182741116754, 'population': 183, 'numerosity': 183, 'reliable': 41}
INFO:lcs.agents.Agent:{'trial': 30, 'steps

END - ACS2ER - 1
START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 5, 'steps_in_trial': 5, 'reward': 1000, 'perf_time': 0.011234399999999312, 'knowledge': 7.614213197969544, 'population': 91, 'numerosity': 91, 'reliable': 15}
INFO:lcs.agents.Agent:{'trial': 10, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.6558891000000031, 'knowledge': 20.812182741116754, 'population': 119, 'numerosity': 119, 'reliable': 41}
INFO:lcs.agents.Agent:{'trial': 15, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.5934012999999965, 'knowledge': 36.54822335025381, 'population': 179, 'numerosity': 179, 'reliable': 72}
INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.6213751999999957, 'knowledge': 44.67005076142132, 'population': 188, 'numerosity': 188, 'reliable': 89}
INFO:lcs.agents.Agent:{'trial': 25, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.6516327999999945, 'knowledge': 57.360406091370564, 'population': 193, 'numerosity': 193, 'reliable': 114}
INFO:lcs.agents.Agent:{'trial': 30, 'steps

END - ACS2ER - 3
START - ACS2ER - 5


INFO:lcs.agents.Agent:{'trial': 5, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.9619757000000106, 'knowledge': 30.456852791878177, 'population': 126, 'numerosity': 126, 'reliable': 61}
INFO:lcs.agents.Agent:{'trial': 10, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.910892600000011, 'knowledge': 48.223350253807105, 'population': 163, 'numerosity': 163, 'reliable': 96}
INFO:lcs.agents.Agent:{'trial': 15, 'steps_in_trial': 22, 'reward': 1000, 'perf_time': 0.0967840999999936, 'knowledge': 63.95939086294417, 'population': 176, 'numerosity': 176, 'reliable': 127}
INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 1.245945000000006, 'knowledge': 70.05076142131979, 'population': 189, 'numerosity': 189, 'reliable': 139}
INFO:lcs.agents.Agent:{'trial': 25, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 1.037027100000003, 'knowledge': 74.11167512690355, 'population': 193, 'numerosity': 193, 'reliable': 147}
INFO:lcs.agents.Agent:{'trial': 30, 'steps

END - ACS2ER - 5
