In [35]:
import os
from collections import defaultdict
from utils.run_utils import Runner

import gym
import gym_maze

from lcs.agents import Agent
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2, ClassifiersList
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER, ReplayMemory, ReplayMemorySample
from lcs.agents.acs2rer import ACS2RER, Configuration as CFG_ACS2RER
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [36]:
MAZE = "Maze5-v0" 
EXPLORE_TRIALS = 200
EXPLOIT_TRIALS = 200

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 100
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [3]


#######

REPEAT_START = 1
REPEAT = 3

EXPERIMENT_NAME = "Maze5_TEST30" # Please edit if running new experiment to do not override saved results.


In [37]:
runner = Runner('MAZE', EXPERIMENT_NAME, MAZE)

## METRICS

In [38]:
MAZE_PATH = 0
MAZE_REWARD = 9

optimal_paths_env = gym.make(MAZE)
matrix = optimal_paths_env.matrix
X = matrix.shape[1]
Y = matrix.shape[0]

def get_reward_pos():
    for i in range(Y):
        for j in range(X):
            if(matrix[i, j] == MAZE_REWARD):
                return(i, j)

def get_possible_neighbour_cords(pos_y, pos_x):
    n = ((pos_y - 1, pos_x), 4)
    ne = ((pos_y - 1, pos_x + 1), 5)
    e = ((pos_y, pos_x + 1), 6)
    se = ((pos_y + 1, pos_x + 1), 7)
    s = ((pos_y + 1, pos_x), 0)
    sw = ((pos_y + 1, pos_x - 1), 1)
    w = ((pos_y, pos_x - 1), 2)
    nw = ((pos_y - 1, pos_x - 1), 3)

    return [n, ne, e, se, s, sw, w, nw]

    
optimal_actions = []

root_node = get_reward_pos()

def is_included(cords, level):
    return any(op_cords[0] == cords[0] and op_cords[1] == cords[1] and level != op_level for op_cords, _, op_level in optimal_actions)


def get_optimal_actions_to(node, level):
    neighbour_cords = get_possible_neighbour_cords(node[0], node[1])

    next_level_cords = []
    for (pos_y, pos_x), action in neighbour_cords:
        if (not is_included((pos_y, pos_x), level)) and matrix[pos_y, pos_x] == MAZE_PATH:
            optimal_actions.append(((pos_y, pos_x), action, level))
            next_level_cords.append((pos_y, pos_x))

    return next_level_cords

LEVEL = 0
next_level_cords = get_optimal_actions_to(root_node, LEVEL)

while len(next_level_cords) > 0:
    LEVEL += 1
    new_next_level_cords = []
    for nlc in next_level_cords:
        new_next_level_cords += get_optimal_actions_to(nlc, LEVEL)

    next_level_cords = new_next_level_cords

positions_actions = defaultdict(set)
for cords, a, _ in optimal_actions: positions_actions[cords].add(a)

positions_actions = positions_actions.items()
POSITIONS_OPTIMAL_ACTIONS = list(map(lambda pa: (optimal_paths_env.env.maze.perception(pa[0]), list(pa[1])), positions_actions))
POSITIONS_OPTIMAL_ACTIONS_LENGTH = len(POSITIONS_OPTIMAL_ACTIONS)



def _maze_optimal(classifiers) -> float:
    nr_correct = 0

    for p0, optimal_actions_list in POSITIONS_OPTIMAL_ACTIONS:
        match_set = classifiers.form_match_set(p0)
        cl = match_set.get_best_classifier()

        if cl is not None and optimal_actions_list.count(cl.action) > 0:
            nr_correct += 1

    return nr_correct / POSITIONS_OPTIMAL_ACTIONS_LENGTH * 100.0


def _maze_optimal_reliable(classifiers) -> float:
    return _maze_optimal(ClassifiersList(*[c for c in classifiers if c.is_reliable()]))


In [39]:
def _get_transitions():
    knowledge_env = gym.make(MAZE)
    transitions = knowledge_env.env.get_transitions()
    transitions = list(map(lambda t: [knowledge_env.env.maze.perception(t[0]), t[1], knowledge_env.env.maze.perception(t[2])], transitions))

    return transitions

TRANSITIONS = _get_transitions()
TRANSITIONS_LENGTH = len(TRANSITIONS)

def _maze_knowledge(population) -> float:
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]

    # Count how many transitions are anticipated correctly
    nr_correct = 0

    # For all possible destinations from each path cell
    for p0, action, p1 in TRANSITIONS:
        if any([True for cl in reliable_classifiers
                if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1

    return nr_correct / TRANSITIONS_LENGTH * 100.0

def _maze_specificity(population) -> float:
    pop_len = len(population)
    if(pop_len) == 0:
        return 0
    return sum(map(lambda c: c.specificity, population)) / pop_len

def _maze_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': _maze_knowledge(pop),
        "specificity": _maze_specificity(agent.population),
        "optimal": _maze_optimal(agent.population),
        "optimal_reliable": _maze_optimal_reliable(agent.population)
    }
    metrics.update(population_metrics(pop, env))
    
    return metrics

def _weight_func_reward(rm: ReplayMemory, sample: ReplayMemorySample):
    if(sample.reward == 0):
        return 1
    
    return 5

def _weight_func_unique(rm: ReplayMemory, sample: ReplayMemorySample):
    existing_count = sum(1 for s in rm if sample.state == s.state and sample.action == s.action and sample.reward == s.reward and sample.next_state == s.next_state and sample.done == s.done)

    return 1 / (existing_count * 2 + 1)

def _weight_func_unique_reward(rm: ReplayMemory, sample: ReplayMemorySample):
    return _weight_func_reward(rm, sample) * _weight_func_unique(rm, sample)


## EXPERIMENT

In [40]:
def _run_experiment(agent, path):
    runner.run_experiment(agent, gym.make(MAZE), EXPLORE_TRIALS, EXPLOIT_TRIALS, path)

def run_acs2_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2(cfg)

        _run_experiment(agent, f'{i}')

def _run_acs2er_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-ER', f'{i}'))

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

def _run_acs2per_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_reward,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_reward', f'{i}'))

def run_acs2per_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - reward")
        _run_acs2per_experiment(er_samples_number)
        print(f"END - ACS2pER - reward")


def _run_acs2per2_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_unique,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_unique', f'{i}'))

def run_acs2per2_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - unique")
        _run_acs2per2_experiment(er_samples_number)
        print(f"END - ACS2pER - unique")


def _run_acs2per3_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent
        cfg = CFG_ACS2ER(
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            er_weight_function=_weight_func_unique_reward,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-pER_unique_reward', f'{i}'))


def run_acs2per3_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2pER - unique + reward")
        _run_acs2per3_experiment(er_samples_number)
        print(f"END - ACS2pER - unique + reward")


def _run_acs2rer_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2RER(    
            classifier_length=8,
            number_of_possible_actions=8,
            metrics_trial_frequency=1,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=_maze_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_3-RER', f'{i}'))

def run_acs2rer_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2rer_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

### RUN ACS2 Experiments

In [41]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.41422310000052676, 'knowledge': 3.4246575342465753, 'specificity': 0.5451388888888888, 'optimal': 55.55555555555556, 'optimal_reliable': 5.555555555555555, 'population': 216, 'numerosity': 216, 'reliable': 11}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.4689434000028996, 'knowledge': 6.8493150684931505, 'specificity': 0.5840747330960854, 'optimal': 58.333333333333336, 'optimal_reliable': 8.333333333333332, 'population': 281, 'numerosity': 281, 'reliable': 21}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.4656929999982822, 'knowledge': 14.383561643835616, 'specificity': 0.6152647975077882, 'optimal': 55.55555555555556, 'optimal_reliable': 16.666666666666664, 'population': 321, 'numerosity': 321, 'reliable': 38}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 4, 'reward': 1000, 'perf_time': 0.039364000000205124, 'kno

### RUN ACS2ER Experiments

In [42]:
run_acs2er_experiments()

START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 2.6047294000018155, 'knowledge': 10.273972602739725, 'specificity': 0.5547385620915033, 'optimal': 36.11111111111111, 'optimal_reliable': 11.11111111111111, 'population': 306, 'numerosity': 306, 'reliable': 58}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 47, 'reward': 1000, 'perf_time': 2.4568257999999332, 'knowledge': 19.17808219178082, 'specificity': 0.5850785340314136, 'optimal': 47.22222222222222, 'optimal_reliable': 25.0, 'population': 382, 'numerosity': 382, 'reliable': 91}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 28, 'reward': 1000, 'perf_time': 1.5782976999980747, 'knowledge': 30.136986301369863, 'specificity': 0.621264367816092, 'optimal': 58.333333333333336, 'optimal_reliable': 27.77777777777778, 'population': 435, 'numerosity': 435, 'reliable': 123}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 2, 'reward': 1000, 'perf_time': 0.09926170000107959, 'knowledge': 35

END - ACS2ER - 3


In [43]:
run_acs2per_experiments()

START - ACS2pER - reward


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 14, 'reward': 1000, 'perf_time': 0.9378385999989405, 'knowledge': 16.43835616438356, 'specificity': 0.5845032397408207, 'optimal': 41.66666666666667, 'optimal_reliable': 16.666666666666664, 'population': 463, 'numerosity': 463, 'reliable': 75}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 9, 'reward': 1000, 'perf_time': 0.6298118000013346, 'knowledge': 37.67123287671233, 'specificity': 0.6331066945606695, 'optimal': 52.77777777777778, 'optimal_reliable': 44.44444444444444, 'population': 478, 'numerosity': 478, 'reliable': 157}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 16, 'reward': 1000, 'perf_time': 1.0394545999988622, 'knowledge': 48.63013698630137, 'specificity': 0.6445630081300813, 'optimal': 66.66666666666666, 'optimal_reliable': 44.44444444444444, 'population': 492, 'numerosity': 492, 'reliable': 208}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.4641914000021643, '

END - ACS2pER - reward


In [44]:
run_acs2per2_experiments()

START - ACS2pER - unique


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 4.641331499999069, 'knowledge': 17.123287671232877, 'specificity': 0.5488703339882122, 'optimal': 55.55555555555556, 'optimal_reliable': 25.0, 'population': 509, 'numerosity': 509, 'reliable': 79}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 5.415326800000912, 'knowledge': 36.986301369863014, 'specificity': 0.6115044247787611, 'optimal': 55.55555555555556, 'optimal_reliable': 33.33333333333333, 'population': 565, 'numerosity': 565, 'reliable': 163}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 6.117682399999467, 'knowledge': 63.013698630136986, 'specificity': 0.6335078534031413, 'optimal': 47.22222222222222, 'optimal_reliable': 44.44444444444444, 'population': 573, 'numerosity': 573, 'reliable': 239}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 3, 'reward': 1000, 'perf_time': 0.3789364000003843, 'knowledge': 73.2876712

END - ACS2pER - unique


In [45]:
run_acs2per3_experiments()

START - ACS2pER - unique + reward


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 4.028499699998065, 'knowledge': 21.91780821917808, 'specificity': 0.5944798301486199, 'optimal': 38.88888888888889, 'optimal_reliable': 33.33333333333333, 'population': 471, 'numerosity': 471, 'reliable': 70}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 5.277778200001194, 'knowledge': 44.52054794520548, 'specificity': 0.63732741617357, 'optimal': 50.0, 'optimal_reliable': 47.22222222222222, 'population': 507, 'numerosity': 507, 'reliable': 149}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 5.914067000001523, 'knowledge': 62.328767123287676, 'specificity': 0.6483333333333333, 'optimal': 50.0, 'optimal_reliable': 30.555555555555557, 'population': 525, 'numerosity': 525, 'reliable': 211}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 13, 'reward': 1000, 'perf_time': 1.6702160999993794, 'knowledge': 71.23287671232876, 'speci

END - ACS2pER - unique + reward


In [46]:

run_acs2rer_experiments()

START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 6, 'reward': 1000, 'perf_time': 0.3601178999997501, 'knowledge': 26.027397260273972, 'specificity': 0.5807349665924276, 'optimal': 41.66666666666667, 'optimal_reliable': 27.77777777777778, 'population': 449, 'numerosity': 449, 'reliable': 114}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 43, 'reward': 1000, 'perf_time': 3.2124911999999313, 'knowledge': 41.0958904109589, 'specificity': 0.6257668711656442, 'optimal': 61.111111111111114, 'optimal_reliable': 27.77777777777778, 'population': 489, 'numerosity': 489, 'reliable': 167}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 15, 'reward': 1000, 'perf_time': 0.8224566999997478, 'knowledge': 47.94520547945205, 'specificity': 0.6361111111111111, 'optimal': 61.111111111111114, 'optimal_reliable': 36.11111111111111, 'population': 495, 'numerosity': 495, 'reliable': 186}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 8, 'reward': 1000, 'perf_time': 0.44865260000005946

END - ACS2ER - 3
