In [1]:
import numpy as np
import pandas as pd
import dill
import os
import itertools

import gym
import gym_corridor

from lcs import Perception
from lcs.agents import Agent
from lcs.agents.acs2 import ACS2, Configuration as CFG_ACS2
from lcs.agents.acs2er import ACS2ER, Configuration as CFG_ACS2ER
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)


  dependencies=[google_dot_protobuf_dot_descriptor__pb2.DESCRIPTOR,])
  serialized_options=None, file=DESCRIPTOR)
  serialized_end=144,
  dependencies=[google_dot_protobuf_dot_descriptor__pb2.DESCRIPTOR,scalapb_dot_scalapb__pb2.DESCRIPTOR,])
  type=None),
  serialized_end=815,
  serialized_options=None, file=DESCRIPTOR)
  serialized_end=291,
  dependencies=[scalapb_dot_scalapb__pb2.DESCRIPTOR,databricks__pb2.DESCRIPTOR,])
  type=None),
  serialized_end=4400,
  serialized_options=None, file=DESCRIPTOR),
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  binary = (7, np.dtype("bytes"), "BinaryType", np.object)


# EXPERIMENT CONFIGURATION

In [2]:
CORRIDOR_LENGTH = 100  # 20 | 40 |100
COR = f'corridor-{CORRIDOR_LENGTH}-v0'
EXPLORE_TRIALS = 500
EXPLOIT_TRIALS = 100
METRICS_FREQUENCY = 1

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 1000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 100
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [1,2,3,5,8,13]



#######

REPEAT_START = 1
REPEAT = 1

EXPERIMENT_NAME = "99" # Please edit if running new experiment to do not override saved results.
DATA_BASE_PATH = "" # CURRENT LOCATION
DATA_PATH = os.path.join(DATA_BASE_PATH, 'COR', EXPERIMENT_NAME, COR)

# if os.path.isdir(DATA_PATH):
#   raise Exception(f"The experiment with name: '{EXPERIMENT_NAME}' for '{MAZE}' environment was run already.")

## METRICS

In [3]:
def get_transitions(grid_size):
    START, END = 1, grid_size
    LEFT, RIGHT = 0, 1
    
    def _handle_state(state):
        moves = []
        if state == START:
            moves.append((state, RIGHT, state+1))
        else:
            moves.append((state, LEFT, state-1))
            moves.append((state, RIGHT, state+1))
        
        return moves
        
    transitions = []
    
    for state in range(START, END):
        transitions += _handle_state(state)
    
    return transitions

TRANSITIONS = get_transitions(CORRIDOR_LENGTH)

def _corridor_knowledge(population, environment):
    reliable = [c for c in population if c.is_reliable()]
    nr_correct = 0
    
    for start, action, end in TRANSITIONS:
        p0 = Perception((str(start),))
        p1 = Perception((str(end),))
        
        if any([True for cl in reliable if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1
    
    return nr_correct / len(TRANSITIONS) * 100.0
    
def corridor_metrics(agent, env):
    pop = agent.population
    metrics = {
        'knowledge': _corridor_knowledge(pop, env)
    }
    metrics.update(population_metrics(pop, env))
    return metrics

class CorridorObservationWrapper(gym.ObservationWrapper):
    def observation(self, observation):
        return observation,
        
def _save_data(data, path, file_name):
    full_dir_path = os.path.join(DATA_PATH, path)
    full_file_path = os.path.join(full_dir_path, f'{file_name}.dill')
    if not os.path.isdir(full_dir_path):
        os.makedirs(full_dir_path)

    dill.dump(data, open(full_file_path, 'wb'))

def _save_agent_data(agent, data, path, file_name):
    path = os.path.join(type(agent).__name__, path)
    _save_data(data, path, file_name)

def _save_metrics(agent, metrics, path, metrics_name):
    _save_agent_data(agent, metrics, path, f'metrics_{metrics_name}')

def _save_explore_metrics(agent, metrics, path):
    _save_metrics(agent, metrics, path, 'EXPLORE')

def _save_exploit_metrics(agent, metrics, path):
    _save_metrics(agent, metrics, path, 'EXPLOIT')

def _save_population(agent: Agent, path):
    _save_agent_data(agent, agent.get_population(), path, 'population')

def _save_environment(agent, env, path):
    _save_agent_data(agent, env, path, 'env')
    
def _save_experiment_data(agent, env, explore_metrics, exploit_metrics, path):
    _save_explore_metrics(agent, explore_metrics, path)
    _save_exploit_metrics(agent, exploit_metrics, path)
    _save_population(agent, path)
    _save_environment(agent, env, path)


## EXPERIMENT

In [4]:

def _run_experiment(agent: Agent, data_path = ''):
    cor = CorridorObservationWrapper(gym.make(COR))
    # Explore the environment
    explore_metrics = agent.explore(cor, EXPLORE_TRIALS)
    # Exploit the environment
    exploit_metrics = agent.exploit(cor, EXPLOIT_TRIALS)

    _save_experiment_data(agent, cor, explore_metrics, exploit_metrics, data_path)

def run_acs2_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2(
            classifier_length=1,
            number_of_possible_actions=2,
            metrics_trial_frequency=METRICS_FREQUENCY,
            user_metrics_collector_fcn=corridor_metrics)
        agent = ACS2(cfg)

        _run_experiment(agent, f'{i}')

def _run_acs2er_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_ACS2ER(    
            classifier_length=1,
            number_of_possible_actions=2,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=corridor_metrics)
        agent = ACS2ER(cfg)

        _run_experiment(agent, os.path.join(f'm_{er_samples_number}', f'{i}'))

def run_acs2er_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_acs2er_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

### RUN ACS2 Experiments

In [5]:
run_acs2_experiment()

INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.09730620000000023, 'knowledge': 64.46700507614213, 'population': 198, 'numerosity': 198, 'reliable': 130}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.10157609999999906, 'knowledge': 84.26395939086294, 'population': 198, 'numerosity': 198, 'reliable': 169}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.1182963000000008, 'knowledge': 90.35532994923858, 'population': 198, 'numerosity': 198, 'reliable': 181}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 90, 'reward': 1000, 'perf_time': 0.043411200000001315, 'knowledge': 92.38578680203045, 'population': 198, 'numerosity': 198, 'reliable': 185}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.10157019999999761, 'knowledge': 96.44670050761421, 'population': 198, 'numerosity': 198, 'reliable': 193}
INFO:lcs.agents.Agent:{'trial

### RUN ACS2ER Experiments

In [6]:
run_acs2er_experiments()

START - ACS2ER - 1


INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.30056260000000634, 'knowledge': 74.11167512690355, 'population': 196, 'numerosity': 196, 'reliable': 146}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.28493380000000457, 'knowledge': 97.46192893401016, 'population': 198, 'numerosity': 198, 'reliable': 194}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 49, 'reward': 1000, 'perf_time': 0.06890410000002589, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 197}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.0013096000000132335, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 197}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 53, 'reward': 1000, 'perf_time': 0.1029696999999885, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'t

END - ACS2ER - 1
START - ACS2ER - 2


INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.4389361000000065, 'knowledge': 86.29441624365482, 'population': 196, 'numerosity': 196, 'reliable': 170}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 61, 'reward': 1000, 'perf_time': 0.15387520000001587, 'knowledge': 97.46192893401016, 'population': 198, 'numerosity': 198, 'reliable': 195}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 56, 'reward': 1000, 'perf_time': 0.12782329999998865, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 115, 'reward': 1000, 'perf_time': 0.24370310000000472, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 192, 'reward': 1000, 'perf_time': 0.4598642000000268, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{

END - ACS2ER - 2
START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 2, 'reward': 1000, 'perf_time': 0.006306499999993775, 'knowledge': 86.80203045685279, 'population': 197, 'numerosity': 197, 'reliable': 174}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 44, 'reward': 1000, 'perf_time': 0.15392029999998158, 'knowledge': 98.47715736040608, 'population': 198, 'numerosity': 198, 'reliable': 197}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 114, 'reward': 1000, 'perf_time': 0.37771049999997786, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 166, 'reward': 1000, 'perf_time': 0.6095821000000115, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 157, 'reward': 1000, 'perf_time': 0.5278289000000314, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agen

END - ACS2ER - 3
START - ACS2ER - 5


INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 0.9641570999999658, 'knowledge': 93.90862944162437, 'population': 198, 'numerosity': 198, 'reliable': 188}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 137, 'reward': 1000, 'perf_time': 0.6563877999999477, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 72, 'reward': 1000, 'perf_time': 0.3439040999999179, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 64, 'reward': 1000, 'perf_time': 0.32799529999999777, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 144, 'reward': 1000, 'perf_time': 0.716229799999951, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'tr

END - ACS2ER - 5
START - ACS2ER - 8


INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 41, 'reward': 1000, 'perf_time': 0.33936949999997523, 'knowledge': 95.93908629441624, 'population': 198, 'numerosity': 198, 'reliable': 192}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 200, 'reward': 1000, 'perf_time': 1.7617961000000832, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 64, 'reward': 1000, 'perf_time': 0.5623895000001085, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 108, 'reward': 1000, 'perf_time': 0.9002302999999756, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 1.7107396999999764, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'t

END - ACS2ER - 8
START - ACS2ER - 13


INFO:lcs.agents.Agent:{'trial': 50, 'steps_in_trial': 200, 'reward': 0, 'perf_time': 2.779816199999914, 'knowledge': 98.47715736040608, 'population': 198, 'numerosity': 198, 'reliable': 197}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 13, 'reward': 1000, 'perf_time': 0.16285070000003543, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 150, 'steps_in_trial': 36, 'reward': 1000, 'perf_time': 0.44709740000007514, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 148, 'reward': 1000, 'perf_time': 1.9356239999999616, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'trial': 250, 'steps_in_trial': 1, 'reward': 1000, 'perf_time': 0.011807399999952395, 'knowledge': 98.98477157360406, 'population': 198, 'numerosity': 198, 'reliable': 198}
INFO:lcs.agents.Agent:{'t

END - ACS2ER - 13
