In [9]:
import numpy as np
import pandas as pd
import dill
import os

import gym
import gym_maze

from lcs.agents import Agent
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


# EXPERIMENT CONFIGURATION

In [10]:
MAZE = "Maze5-v0" 
EXPLORE_TRIALS = 5000
EXPLOIT_TRIALS = 1000

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 1000
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [1,2,3,4,5,6,7,8,9,10]



#######

EXPERIMENT_NAME = "10" # Please edit if running new experiment to do not override saved results.
DATA_BASE_PATH = "" # CURRENT LOCATION
DATA_PATH = os.path.join(DATA_BASE_PATH, 'MAZE', EXPERIMENT_NAME, MAZE)

if os.path.isdir(DATA_PATH):
  raise Exception(f"The experiment with name: '{EXPERIMENT_NAME}' for '{MAZE}' environment was run already.")

## METRICS

In [11]:
def _get_transitions():
    knowledge_env = gym.make(MAZE)
    transitions = knowledge_env.env.get_transitions()
    transitions = list(map(lambda t: [knowledge_env.env.maze.perception(t[0]), t[1], knowledge_env.env.maze.perception(t[2])], transitions))

    return transitions

TRANSITIONS = _get_transitions()
TRANSITIONS_LENGTH = len(TRANSITIONS)

def _maze_knowledge(population) -> float:
    # Take into consideration only reliable classifiers
    reliable_classifiers = [c for c in population if c.is_reliable()]

    # Count how many transitions are anticipated correctly
    nr_correct = 0

    # For all possible destinations from each path cell
    for p0, action, p1 in TRANSITIONS:
        if any([True for cl in reliable_classifiers
                if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1

    return nr_correct / TRANSITIONS_LENGTH * 100.0

def _maze_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': _maze_knowledge(pop)
    }
    metrics.update(population_metrics(pop, env))
    
    return metrics


def _save_data(data, path, file_name):
    full_dir_path = os.path.join(DATA_PATH, path)
    full_file_path = os.path.join(full_dir_path, f'{file_name}.dill')
    if not os.path.isdir(full_dir_path):
        os.makedirs(full_dir_path)

    dill.dump(data, open(full_file_path, 'wb'))

def _save_agent_data(agent, data, path, file_name):
    path = os.path.join(type(agent).__name__, path)
    _save_data(data, path, file_name)

def _save_metrics(agent, metrics, path, metrics_name):
    _save_agent_data(agent, metrics, path, f'metrics_{metrics_name}')

def _save_explore_metrics(agent, metrics, path):
    _save_metrics(agent, metrics, path, 'EXPLORE')

def _save_exploit_metrics(agent, metrics, path):
    _save_metrics(agent, metrics, path, 'EXPLOIT')

def _save_population(agent: Agent, path):
    _save_agent_data(agent, agent.get_population(), path, 'population')

def _save_environment(agent, env, path):
    _save_agent_data(agent, env, path, 'env')
    
def _save_experiment_data(agent, env, explore_metrics, exploit_metrics, path):
    _save_explore_metrics(agent, explore_metrics, path)
    _save_exploit_metrics(agent, exploit_metrics, path)
    _save_population(agent, path)
    _save_environment(agent, env, path)


## EXPERIMENT

In [12]:

def _run_experiment(agent: Agent, data_path = ''):
    maze = gym.make(MAZE)
    # Explore the environment
    explore_metrics = agent.explore(maze, EXPLORE_TRIALS)
    # Exploit the environment
    exploit_metrics = agent.exploit(maze, EXPLOIT_TRIALS)

    _save_experiment_data(agent, maze, explore_metrics, exploit_metrics, data_path)

def run_acs2_experiment():
    # Create agent 
    cfg = CFG_ACS2(    
        classifier_length=8,
        number_of_possible_actions=8,
        metrics_trial_frequency=1,
        user_metrics_collector_fcn=_maze_metrics)
    agent = ACS2(cfg)

    _run_experiment(agent)

def _run_acs2er_experiment(er_samples_number: int):
    # Create agent 
    cfg = CFG_ACS2ER(    
        classifier_length=8,
        number_of_possible_actions=8,
        metrics_trial_frequency=1,
        er_buffer_size=ER_BUFFER_SIZE,
        er_min_samples=ER_BUFFER_MIN_SAMPLES,
        er_samples_number=er_samples_number,
        user_metrics_collector_fcn=_maze_metrics)
    agent = ACS2ER(cfg)

    _run_experiment(agent, f'm_{er_samples_number}')

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

### RUN ACS2 Experiments

In [13]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 28, 'reward': 1000, 'perf_time': 0.04979279999997743, 'knowledge': 2.054794520547945, 'population': 283, 'numerosity': 283, 'reliable': 9}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 40, 'reward': 1000, 'perf_time': 0.09285050000005413, 'knowledge': 10.273972602739725, 'population': 399, 'numerosity': 399, 'reliable': 50}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.13909540000008747, 'knowledge': 17.123287671232877, 'population': 452, 'numerosity': 452, 'reliable': 73}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 12, 'reward': 1000, 'perf_time': 0.02307189999999082, 'knowledge': 19.863013698630137, 'population': 460, 'numerosity': 460, 'reliable': 94}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 18, 'reward': 1000, 'perf_time': 0.07547030000000632, 'knowledge': 21.91780821917808, 'population': 464, 'numerosity': 464, 'reliable': 102}
INFO:lcs.agents.Agent:{'trial

### RUN ACS2ER Experiments

In [14]:
run_acs2er_experiments()

START - ACS2ER - 1


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 43, 'reward': 1000, 'perf_time': 0.2539623999999776, 'knowledge': 1.36986301369863, 'population': 377, 'numerosity': 377, 'reliable': 15}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 4, 'reward': 1000, 'perf_time': 0.02624219999995603, 'knowledge': 7.534246575342466, 'population': 501, 'numerosity': 501, 'reliable': 47}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.3538303000000269, 'knowledge': 10.95890410958904, 'population': 527, 'numerosity': 527, 'reliable': 63}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 44, 'reward': 1000, 'perf_time': 0.3653868000000102, 'knowledge': 15.068493150684931, 'population': 541, 'numerosity': 541, 'reliable': 81}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 14, 'reward': 1000, 'perf_time': 0.37970229999996263, 'knowledge': 15.068493150684931, 'population': 543, 'numerosity': 543, 'reliable': 89}
INFO:lcs.agents.Agent:{'trial': 120

END - ACS2ER - 1
START - ACS2ER - 2


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 0.4795110999999679, 'knowledge': 14.383561643835616, 'population': 436, 'numerosity': 436, 'reliable': 63}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 19, 'reward': 1000, 'perf_time': 0.19519119999995382, 'knowledge': 24.65753424657534, 'population': 470, 'numerosity': 470, 'reliable': 135}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 34, 'reward': 1000, 'perf_time': 0.35846489999994446, 'knowledge': 36.3013698630137, 'population': 482, 'numerosity': 482, 'reliable': 170}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 20, 'reward': 1000, 'perf_time': 0.21196450000002187, 'knowledge': 41.78082191780822, 'population': 485, 'numerosity': 485, 'reliable': 189}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 4, 'reward': 1000, 'perf_time': 0.04099959999996372, 'knowledge': 48.63013698630137, 'population': 484, 'numerosity': 484, 'reliable': 217}
INFO:lcs.agents.Agent:{'trial'

END - ACS2ER - 2
START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.3603936000000658, 'knowledge': 23.28767123287671, 'population': 654, 'numerosity': 654, 'reliable': 108}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 3, 'reward': 1000, 'perf_time': 0.05389879999995628, 'knowledge': 45.89041095890411, 'population': 667, 'numerosity': 667, 'reliable': 201}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 50, 'reward': 0, 'perf_time': 1.2795202999999447, 'knowledge': 63.013698630136986, 'population': 654, 'numerosity': 654, 'reliable': 286}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 13, 'reward': 1000, 'perf_time': 0.22067970000000514, 'knowledge': 71.23287671232876, 'population': 629, 'numerosity': 629, 'reliable': 326}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 8, 'reward': 1000, 'perf_time': 0.1827358999998978, 'knowledge': 78.08219178082192, 'population': 615, 'numerosity': 615, 'reliable': 371}
INFO:lcs.agents.Agent:{'trial': 12

END - ACS2ER - 3
