In [1]:
import os
from utils.run_utils import Runner

import gym
import gym_maze

from lcs.agents import Agent
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER, ReplayMemory, ReplayMemorySample
from lcs.agents.acs2rer import ACS2RER, Configuration as CFG_ACS2RER
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [2]:
MAZE = "Maze7-v0" 
EXPLORE_TRIALS = 1000
EXPLOIT_TRIALS = 200

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 500
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [3]


#######

REPEAT_START = 1
REPEAT = 6

EXPERIMENT_NAME = "Maze7_TEST10" # Please edit if running new experiment to do not override saved results.


In [3]:
runner = Runner('MAZE', EXPERIMENT_NAME, MAZE)

## METRICS

In [4]:
def _get_transitions():
    knowledge_env = gym.make(MAZE)
    transitions = knowledge_env.env.get_transitions()
    transitions = list(map(lambda t: [knowledge_env.env.maze.perception(t[0]), t[1], knowledge_env.env.maze.perception(t[2])], transitions))

    return transitions

TRANSITIONS = _get_transitions()
TRANSITIONS_LENGTH = len(TRANSITIONS)

def _maze_knowledge(population) -> float:
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]

    # Count how many transitions are anticipated correctly
    nr_correct = 0

    # For all possible destinations from each path cell
    for p0, action, p1 in TRANSITIONS:
        if any([True for cl in reliable_classifiers
                if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1

    return nr_correct / TRANSITIONS_LENGTH * 100.0

def _maze_specificity(population) -> float:
    pop_len = len(population)
    if(pop_len) == 0:
        return 0
    return sum(map(lambda c: c.specificity, population)) / pop_len

def _maze_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': _maze_knowledge(pop),
        "specificity": _maze_specificity(agent.population)
    }
    metrics.update(population_metrics(pop, env))
    
    return metrics

def _weight_func_reward(rm: ReplayMemory, sample: ReplayMemorySample):
    if(sample.reward == 0):
        return 1
    
    return 5

def _weight_func_unique(rm: ReplayMemory, sample: ReplayMemorySample):
    existing_count = sum(1 for s in rm if sample.state == s.state and sample.action == s.action and sample.reward == s.reward and sample.next_state == s.next_state and sample.done == s.done)

    return 1 / (existing_count * 2 + 1)


## EXPERIMENT

In [5]:
def _run_experiment(agent, path):
    runner.run_experiment(agent, gym.make(MAZE), EXPLORE_TRIALS, EXPLOIT_TRIALS, path)

def run_acs2_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2(cfg)

        _run_experiment(agent, f'{i}')

def _run_acs2er_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_{er_samples_number}', f'{i}'))

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")


def _run_acs2per_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_reward,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_reward', f'{i}'))

def run_acs2per_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - reward")
        _run_acs2per_experiment(er_samples_number)
        print(f"END - ACS2pER - reward")


def _run_acs2per2_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_unique,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_unique', f'{i}'))

def run_acs2per2_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - unique")
        _run_acs2per2_experiment(er_samples_number)
        print(f"END - ACS2pER - unique")


def _run_acs2rer_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-RER', f'{i}'))

def run_acs2rer_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2rer_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

### RUN ACS2 Experiments

In [6]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 5, 'reward': 1000, 'perf_time': 0.016917499999998142, 'knowledge': 26.89655172413793, 'specificity': 0.6373626373626373, 'population': 364, 'numerosity': 364, 'reliable': 72}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 22, 'reward': 1000, 'perf_time': 0.10374110000000059, 'knowledge': 40.0, 'specificity': 0.6585526315789474, 'population': 380, 'numerosity': 380, 'reliable': 139}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 17, 'reward': 1000, 'perf_time': 0.06716259999999608, 'knowledge': 51.03448275862069, 'specificity': 0.6688179347826086, 'population': 368, 'numerosity': 368, 'reliable': 173}
INFO:lcs.agents.Agent:{'trial': 400, 'steps_in_trial': 5, 'reward': 1000, 'perf_time': 0.009782899999990491, 'knowledge': 57.24137931034483, 'specificity': 0.6726519337016574, 'population': 362, 'numerosity': 362, 'reliable': 188}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 13, 'reward': 1000, 'perf_time': 0.

### RUN ACS2ER Experiments

In [7]:
run_acs2er_experiments()

START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.482777699999815, 'knowledge': 72.41379310344827, 'specificity': 0.6491228070175439, 'population': 399, 'numerosity': 399, 'reliable': 232}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 27, 'reward': 1000, 'perf_time': 0.4544025000000147, 'knowledge': 85.51724137931035, 'specificity': 0.6584224598930482, 'population': 374, 'numerosity': 374, 'reliable': 281}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 26, 'reward': 1000, 'perf_time': 0.40663150000000314, 'knowledge': 90.3448275862069, 'specificity': 0.6653899721448467, 'population': 359, 'numerosity': 359, 'reliable': 300}
INFO:lcs.agents.Agent:{'trial': 400, 'steps_in_trial': 4, 'reward': 1000, 'perf_time': 0.07247379999989789, 'knowledge': 93.79310344827586, 'specificity': 0.6670197740112994, 'population': 354, 'numerosity': 354, 'reliable': 313}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 23, 'reward': 1000, 'perf_tim

END - ACS2ER - 3


In [8]:
run_acs2per_experiments()

START - ACS2pER - reward


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 8, 'reward': 1000, 'perf_time': 0.10688189999973474, 'knowledge': 91.72413793103448, 'specificity': 0.6799163179916318, 'population': 478, 'numerosity': 478, 'reliable': 393}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 15, 'reward': 1000, 'perf_time': 0.1849976000003153, 'knowledge': 97.93103448275862, 'specificity': 0.687641723356009, 'population': 441, 'numerosity': 441, 'reliable': 420}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 11, 'reward': 1000, 'perf_time': 0.13589619999993374, 'knowledge': 99.3103448275862, 'specificity': 0.6886467889908257, 'population': 436, 'numerosity': 436, 'reliable': 426}
INFO:lcs.agents.Agent:{'trial': 400, 'steps_in_trial': 9, 'reward': 1000, 'perf_time': 0.1097341999998207, 'knowledge': 99.3103448275862, 'specificity': 0.6898148148148148, 'population': 432, 'numerosity': 432, 'reliable': 428}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 16, 'reward': 1000, 'perf_ti

END - ACS2pER - reward


In [9]:
run_acs2per2_experiments()

START - ACS2pER - unique


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.4063628999992943, 'knowledge': 88.96551724137932, 'specificity': 0.6620911214953271, 'population': 428, 'numerosity': 428, 'reliable': 272}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 17, 'reward': 1000, 'perf_time': 0.6278050999999323, 'knowledge': 97.24137931034483, 'specificity': 0.6778215223097113, 'population': 381, 'numerosity': 381, 'reliable': 346}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 12, 'reward': 1000, 'perf_time': 0.5245281000006798, 'knowledge': 97.93103448275862, 'specificity': 0.6821625344352618, 'population': 363, 'numerosity': 363, 'reliable': 349}
INFO:lcs.agents.Agent:{'trial': 400, 'steps_in_trial': 4, 'reward': 1000, 'perf_time': 0.17781579999973474, 'knowledge': 99.3103448275862, 'specificity': 0.6822916666666666, 'population': 360, 'numerosity': 360, 'reliable': 354}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 15, 'reward': 1000, 'perf_tim

END - ACS2pER - unique


In [10]:
run_acs2rer_experiments()

START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 13, 'reward': 1000, 'perf_time': 0.15906300000096962, 'knowledge': 72.41379310344827, 'specificity': 0.6632593457943925, 'population': 428, 'numerosity': 428, 'reliable': 265}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.08089049999944109, 'knowledge': 86.89655172413792, 'specificity': 0.6719128329297821, 'population': 413, 'numerosity': 413, 'reliable': 318}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 15, 'reward': 1000, 'perf_time': 0.16330589999961376, 'knowledge': 91.72413793103448, 'specificity': 0.6801282051282052, 'population': 390, 'numerosity': 390, 'reliable': 343}
INFO:lcs.agents.Agent:{'trial': 400, 'steps_in_trial': 17, 'reward': 1000, 'perf_time': 0.18843070000002626, 'knowledge': 96.55172413793103, 'specificity': 0.681135770234987, 'population': 383, 'numerosity': 383, 'reliable': 362}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 7, 'reward': 1000, 'per

END - ACS2ER - 3
