In [15]:
import numpy as np
import pandas as pd
import dill
import os

import gym
import gym_maze

from lcs.agents import Agent
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [16]:
MAZE = "Maze5-v0" 
EXPLORE_TRIALS = 5000
EXPLOIT_TRIALS = 1000

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 1000
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [1,2,3,4,5,6,7,8,9,10]



#######

EXPERIMENT_NAME = "10" # Please edit if running new experiment to do not override saved results.
DATA_BASE_PATH = "" # CURRENT LOCATION
DATA_PATH = os.path.join(DATA_BASE_PATH, 'MAZE', EXPERIMENT_NAME, MAZE)

if os.path.isdir(DATA_PATH):
  raise Exception(f"The experiment with name: '{EXPERIMENT_NAME}' for '{MAZE}' environment was run already.")

## METRICS

In [17]:
def _get_transitions():
    knowledge_env = gym.make(MAZE)
    transitions = knowledge_env.env.get_transitions()
    transitions = list(map(lambda t: [knowledge_env.env.maze.perception(t[0]), t[1], knowledge_env.env.maze.perception(t[2])], transitions))

    return transitions

TRANSITIONS = _get_transitions()
TRANSITIONS_LENGTH = len(TRANSITIONS)

def _maze_knowledge(population) -> float:
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]

    # Count how many transitions are anticipated correctly
    nr_correct = 0

    # For all possible destinations from each path cell
    for p0, action, p1 in TRANSITIONS:
        if any([True for cl in reliable_classifiers
                if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1

    return nr_correct / TRANSITIONS_LENGTH * 100.0

def _maze_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': _maze_knowledge(pop)
    }
    metrics.update(population_metrics(pop, env))
    
    return metrics


def _save_data(data, path, file_name):
    full_dir_path = os.path.join(DATA_PATH, path)
    full_file_path = os.path.join(full_dir_path, f'{file_name}.dill')
    if not os.path.isdir(full_dir_path):
        os.makedirs(full_dir_path)

    dill.dump(data, open(full_file_path, 'wb'))

def _save_agent_data(agent, data, path, file_name):
    path = os.path.join(type(agent).__name__, path)
    _save_data(data, path, file_name)

def _save_metrics(agent, metrics, path, metrics_name):
    _save_agent_data(agent, metrics, path, f'metrics_{metrics_name}')

def _save_explore_metrics(agent, metrics, path):
    _save_metrics(agent, metrics, path, 'EXPLORE')

def _save_exploit_metrics(agent, metrics, path):
    _save_metrics(agent, metrics, path, 'EXPLOIT')

def _save_population(agent: Agent, path):
    _save_agent_data(agent, agent.get_population(), path, 'population')

def _save_environment(agent, env, path):
    _save_agent_data(agent, env, path, 'env')
    
def _save_experiment_data(agent, env, explore_metrics, exploit_metrics, path):
    _save_explore_metrics(agent, explore_metrics, path)
    _save_exploit_metrics(agent, exploit_metrics, path)
    _save_population(agent, path)
    _save_environment(agent, env, path)


## EXPERIMENT

In [18]:

def _run_experiment(agent: Agent, data_path = ''):
    maze = gym.make(MAZE)
    # Explore the environment
    explore_metrics = agent.explore(maze, EXPLORE_TRIALS)
    # Exploit the environment
    exploit_metrics = agent.exploit(maze, EXPLOIT_TRIALS)

    _save_experiment_data(agent, maze, explore_metrics, exploit_metrics, data_path)

def run_acs2_experiment():
    # Create agent 
    cfg = CFG_ACS2(    
        classifier_length=8,
        number_of_possible_actions=8,
        metrics_trial_frequency=1,
        user_metrics_collector_fcn=_maze_metrics)
    agent = ACS2(cfg)

    _run_experiment(agent)

def _run_acs2er_experiment(er_samples_number: int):
    # Create agent 
    cfg = CFG_ACS2ER(    
        classifier_length=8,
        number_of_possible_actions=8,
        metrics_trial_frequency=1,
        er_buffer_size=ER_BUFFER_SIZE,
        er_min_samples=ER_BUFFER_MIN_SAMPLES,
        er_samples_number=er_samples_number,
        user_metrics_collector_fcn=_maze_metrics)
    agent = ACS2ER(cfg)

    _run_experiment(agent, f'm_{er_samples_number}')

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

### RUN ACS2 Experiments

In [19]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 11, 'reward': 1000, 'perf_time': 0.02538430000004155, 'knowledge': 54.794520547945204, 'population': 409, 'numerosity': 409, 'reliable': 190}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 16, 'reward': 1000, 'perf_time': 0.03849259999969945, 'knowledge': 70.54794520547945, 'population': 395, 'numerosity': 395, 'reliable': 240}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 21, 'reward': 1000, 'perf_time': 0.03763709999930143, 'knowledge': 83.56164383561644, 'population': 378, 'numerosity': 378, 'reliable': 284}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 11, 'reward': 1000, 'perf_time': 0.034118699999453383, 'knowledge': 89.04109589041096, 'population': 368, 'numerosity': 368, 'reliable': 301}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.013261600000078033, 'knowledge': 93.15068493150685, 'population': 364, 'numerosity': 364, 'reliable': 308}
INFO:lcs.agent

### RUN ACS2ER Experiments

In [20]:
run_acs2er_experiments()

START - ACS2ER - 1


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 16, 'reward': 1000, 'perf_time': 0.09870299999965937, 'knowledge': 64.38356164383562, 'population': 360, 'numerosity': 360, 'reliable': 199}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.004257399999914924, 'knowledge': 83.56164383561644, 'population': 345, 'numerosity': 345, 'reliable': 265}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 11, 'reward': 1000, 'perf_time': 0.05215740000039659, 'knowledge': 91.0958904109589, 'population': 334, 'numerosity': 334, 'reliable': 286}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 9, 'reward': 1000, 'perf_time': 0.03178589999970427, 'knowledge': 93.83561643835617, 'population': 331, 'numerosity': 331, 'reliable': 297}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 22, 'reward': 1000, 'perf_time': 0.088679399999819, 'knowledge': 98.63013698630137, 'population': 349, 'numerosity': 349, 'reliable': 308}
INFO:lcs.agents.Agen

END - ACS2ER - 1
START - ACS2ER - 2


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 10, 'reward': 1000, 'perf_time': 0.0665034999992713, 'knowledge': 84.93150684931507, 'population': 407, 'numerosity': 407, 'reliable': 298}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 3, 'reward': 1000, 'perf_time': 0.01820899999984249, 'knowledge': 96.57534246575342, 'population': 375, 'numerosity': 375, 'reliable': 346}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 10, 'reward': 1000, 'perf_time': 0.06547390000014275, 'knowledge': 98.63013698630137, 'population': 364, 'numerosity': 364, 'reliable': 351}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 5, 'reward': 1000, 'perf_time': 0.03500700000040524, 'knowledge': 98.63013698630137, 'population': 359, 'numerosity': 359, 'reliable': 351}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 6, 'reward': 1000, 'perf_time': 0.03574349999962578, 'knowledge': 100.0, 'population': 358, 'numerosity': 358, 'reliable': 354}
INFO:lcs.agents.Agent:{'trial': 

END - ACS2ER - 2
START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 17, 'reward': 1000, 'perf_time': 0.2118371999986266, 'knowledge': 87.67123287671232, 'population': 403, 'numerosity': 403, 'reliable': 324}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 2, 'reward': 1000, 'perf_time': 0.024976600001537008, 'knowledge': 97.94520547945206, 'population': 376, 'numerosity': 376, 'reliable': 355}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 5, 'reward': 1000, 'perf_time': 0.06125380000048608, 'knowledge': 100.0, 'population': 370, 'numerosity': 370, 'reliable': 364}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 10, 'reward': 1000, 'perf_time': 0.0915421999998216, 'knowledge': 100.0, 'population': 367, 'numerosity': 367, 'reliable': 364}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 24, 'reward': 1000, 'perf_time': 0.2605751999999484, 'knowledge': 100.0, 'population': 366, 'numerosity': 366, 'reliable': 364}
INFO:lcs.agents.Agent:{'trial': 3000, 'steps_in_trial': 

END - ACS2ER - 3
START - ACS2ER - 4


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 2, 'reward': 1000, 'perf_time': 0.043440800000098534, 'knowledge': 95.2054794520548, 'population': 414, 'numerosity': 414, 'reliable': 370}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.09100990000115416, 'knowledge': 100.0, 'population': 393, 'numerosity': 393, 'reliable': 383}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 4, 'reward': 1000, 'perf_time': 0.05237430000124732, 'knowledge': 100.0, 'population': 386, 'numerosity': 386, 'reliable': 386}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 3, 'reward': 1000, 'perf_time': 0.03526659999988624, 'knowledge': 100.0, 'population': 386, 'numerosity': 386, 'reliable': 386}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 10, 'reward': 1000, 'perf_time': 0.11089619999984279, 'knowledge': 100.0, 'population': 386, 'numerosity': 386, 'reliable': 386}
INFO:lcs.agents.Agent:{'trial': 3000, 'steps_in_trial': 10, 'reward'

END - ACS2ER - 4
START - ACS2ER - 5


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 25, 'reward': 1000, 'perf_time': 0.38900810000086494, 'knowledge': 98.63013698630137, 'population': 381, 'numerosity': 381, 'reliable': 344}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 8, 'reward': 1000, 'perf_time': 0.09702319999996689, 'knowledge': 100.0, 'population': 358, 'numerosity': 358, 'reliable': 354}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 5, 'reward': 1000, 'perf_time': 0.06449310000061814, 'knowledge': 100.0, 'population': 357, 'numerosity': 357, 'reliable': 354}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 8, 'reward': 1000, 'perf_time': 0.10459110000010696, 'knowledge': 100.0, 'population': 355, 'numerosity': 355, 'reliable': 354}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 26, 'reward': 1000, 'perf_time': 0.32029590000092867, 'knowledge': 100.0, 'population': 354, 'numerosity': 354, 'reliable': 354}
INFO:lcs.agents.Agent:{'trial': 3000, 'steps_in_trial': 5, 'reward'

END - ACS2ER - 5
START - ACS2ER - 6


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 15, 'reward': 1000, 'perf_time': 0.2415424000009807, 'knowledge': 97.26027397260275, 'population': 386, 'numerosity': 386, 'reliable': 357}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.10559629999988829, 'knowledge': 99.31506849315068, 'population': 374, 'numerosity': 374, 'reliable': 369}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 5, 'reward': 1000, 'perf_time': 0.07725260000006529, 'knowledge': 99.31506849315068, 'population': 373, 'numerosity': 373, 'reliable': 369}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 14, 'reward': 1000, 'perf_time': 0.21602209999946353, 'knowledge': 100.0, 'population': 371, 'numerosity': 371, 'reliable': 370}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 5, 'reward': 1000, 'perf_time': 0.07816789999924367, 'knowledge': 100.0, 'population': 370, 'numerosity': 370, 'reliable': 370}
INFO:lcs.agents.Agent:{'trial': 3000, 'steps

END - ACS2ER - 6
START - ACS2ER - 7


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 12, 'reward': 1000, 'perf_time': 0.2473335999984556, 'knowledge': 97.26027397260275, 'population': 421, 'numerosity': 421, 'reliable': 397}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.14085439999871596, 'knowledge': 99.31506849315068, 'population': 412, 'numerosity': 412, 'reliable': 404}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 20, 'reward': 1000, 'perf_time': 0.38232049999896844, 'knowledge': 100.0, 'population': 407, 'numerosity': 407, 'reliable': 405}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 11, 'reward': 1000, 'perf_time': 0.22081649999927322, 'knowledge': 100.0, 'population': 406, 'numerosity': 406, 'reliable': 405}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 12, 'reward': 1000, 'perf_time': 0.2385439999998198, 'knowledge': 100.0, 'population': 405, 'numerosity': 405, 'reliable': 405}
INFO:lcs.agents.Agent:{'trial': 3000, 'steps_in_trial':

END - ACS2ER - 7
START - ACS2ER - 8


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 11, 'reward': 1000, 'perf_time': 0.2032842000007804, 'knowledge': 96.57534246575342, 'population': 351, 'numerosity': 351, 'reliable': 328}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.12055680000048596, 'knowledge': 99.31506849315068, 'population': 347, 'numerosity': 347, 'reliable': 334}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 13, 'reward': 1000, 'perf_time': 0.22410020000097575, 'knowledge': 100.0, 'population': 340, 'numerosity': 340, 'reliable': 335}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 16, 'reward': 1000, 'perf_time': 0.2685143999988213, 'knowledge': 100.0, 'population': 339, 'numerosity': 339, 'reliable': 339}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 16, 'reward': 1000, 'perf_time': 0.27250629999980447, 'knowledge': 100.0, 'population': 339, 'numerosity': 339, 'reliable': 339}
INFO:lcs.agents.Agent:{'trial': 3000, 'steps_in_trial':

END - ACS2ER - 8
START - ACS2ER - 9


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.15662739999970654, 'knowledge': 100.0, 'population': 383, 'numerosity': 383, 'reliable': 373}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 13, 'reward': 1000, 'perf_time': 0.28434220000053756, 'knowledge': 100.0, 'population': 375, 'numerosity': 375, 'reliable': 374}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 7, 'reward': 1000, 'perf_time': 0.16360809999969206, 'knowledge': 100.0, 'population': 374, 'numerosity': 374, 'reliable': 374}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 18, 'reward': 1000, 'perf_time': 0.4236420999986876, 'knowledge': 100.0, 'population': 374, 'numerosity': 374, 'reliable': 374}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 2, 'reward': 1000, 'perf_time': 0.04253959999914514, 'knowledge': 100.0, 'population': 374, 'numerosity': 374, 'reliable': 374}
INFO:lcs.agents.Agent:{'trial': 3000, 'steps_in_trial': 10, 'reward': 1000, 'per

END - ACS2ER - 9
START - ACS2ER - 10


INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 11, 'reward': 1000, 'perf_time': 0.4753884999990987, 'knowledge': 98.63013698630137, 'population': 443, 'numerosity': 443, 'reliable': 431}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 9, 'reward': 1000, 'perf_time': 0.2823671000005561, 'knowledge': 100.0, 'population': 443, 'numerosity': 443, 'reliable': 436}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 9, 'reward': 1000, 'perf_time': 0.29014180000012857, 'knowledge': 100.0, 'population': 440, 'numerosity': 440, 'reliable': 439}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 10, 'reward': 1000, 'perf_time': 0.31535620000067865, 'knowledge': 100.0, 'population': 439, 'numerosity': 439, 'reliable': 439}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 33, 'reward': 1000, 'perf_time': 1.0682240999994974, 'knowledge': 100.0, 'population': 439, 'numerosity': 439, 'reliable': 439}
INFO:lcs.agents.Agent:{'trial': 3000, 'steps_in_trial': 11, 'reward':

END - ACS2ER - 10
