In [1]:
import os
import itertools
import numpy as np
from utils.run_utils import Runner

import gym
import gym_multiplexer

from lcs import Perception
from lcs.agents.racs import RACS, Configuration as CFG_RACS
from lcs.agents.racser import RACSER, Configuration as CFG_RACSER
from lcs.representations.RealValueEncoder import RealValueEncoder
from lcs.metrics import population_metrics

# Logger
import logging
logging.basicConfig(level=logging.INFO)




# EXPERIMENT CONFIGURATION

In [2]:
CP = 'CartPole-v0'
EXPLORE_TRIALS = 200
EXPLOIT_TRIALS = 100
METRICS_FREQUENCY = 1

# The size of ER replay memory buffer
ER_BUFFER_SIZE = 10000
# The minimum number of samples of ER replay memory buffer to start replying samples (warm-up phase)
ER_BUFFER_MIN_SAMPLES = 1000
# The number of samples to be replayed druing ER phase
ER_SAMPLES_NUMBER_LIST = [3]



#######

REPEAT_START = 1
REPEAT = 1

EXPERIMENT_NAME = "CP_TEST5" # Please edit if running new experiment to do not override saved results.

In [3]:
runner = Runner('CP', EXPERIMENT_NAME, CP)

## METRICS

In [4]:
"""    
    | Num | Observation           | Min                  | Max                |
    |-----|-----------------------|----------------------|--------------------|
    | 0   | Cart Position         | -4.8                 | 4.8                |
    | 1   | Cart Velocity         | -Inf                 | Inf                |
    | 2   | Pole Angle            | ~ -0.418 rad (-24°)  | ~ 0.418 rad (24°)  |
    | 3   | Pole Angular Velocity | -Inf                 | Inf                |
"""

norm_env = gym.make(CP)
steps = 20000

obs_arr = np.zeros((steps, 4))

for i in range(steps):
    norm_env.reset()
    done = False
    while not done:
        obs, reward, done, _ = norm_env.step(norm_env.action_space.sample())
        obs_arr[i, :] = obs

norm_env.close()

high = obs_arr.max(axis=0)
low = obs_arr.min(axis=0)

print(high)
print(low)


class CartPoleObservationWrapper(gym.ObservationWrapper):
    def __init__(self, env, low, high):
        super().__init__(env)
        self.low = low
        self.high = high

    def observation(self, observation):
        new_obs = []

        for i, v in enumerate(observation):
            new_value = (v - self.low[i]) / (self.high[i] - self.low[i])
            new_obs.append(max(0, min(1, new_value)))

        return new_obs
        

def cp_metrics(agent, env):
    metrics = {
    }
    metrics.update(population_metrics(agent.population, env))

    # x = list(agent.population)
    return metrics


  f"The environment {path} is out of date. You should consider "


[2.10836363 2.71976781 0.2704449  3.46813893]
[-2.20633292 -3.20225215 -0.26554677 -3.5714047 ]


## EXPERIMENT

In [5]:
def _run_experiment(agent, path):
    runner.run_experiment(agent, CartPoleObservationWrapper(gym.make(CP), low, high), EXPLORE_TRIALS, EXPLOIT_TRIALS, path)
    
def run_racs_experiment():
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_RACS(
            encoder=RealValueEncoder(3),
            classifier_length=4,
            number_of_possible_actions=2,
            do_ga=False,
            metrics_trial_frequency=METRICS_FREQUENCY,
            user_metrics_collector_fcn=cp_metrics)
        agent = RACS(cfg)

        _run_experiment(agent, f'{i}')

def _run_racser_experiment(er_samples_number: int):
    for i in range(REPEAT_START, REPEAT_START + REPEAT):
        # Create agent 
        cfg = CFG_RACSER(    
            encoder=RealValueEncoder(3),
            classifier_length=4,
            number_of_possible_actions=2,
            do_ga=False,
            metrics_trial_frequency=METRICS_FREQUENCY,
            er_buffer_size=ER_BUFFER_SIZE,
            er_min_samples=ER_BUFFER_MIN_SAMPLES,
            er_samples_number=er_samples_number,
            user_metrics_collector_fcn=cp_metrics)
        agent = RACSER(cfg)

        _run_experiment(agent, os.path.join(f'm_{er_samples_number}', f'{i}'))

def run_racser_experiments():
    for er_samples_number in ER_SAMPLES_NUMBER_LIST:
        print(f"START - ACS2ER - {er_samples_number}")
        _run_racser_experiment(er_samples_number)
        print(f"END - ACS2ER - {er_samples_number}")

### RUN ACS2 Experiments

In [6]:
run_racs_experiment()

INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 21, 'reward': 1.0, 'perf_time': 1.637636299999997, 'population': 622, 'numerosity': 622, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 12, 'reward': 1.0, 'perf_time': 1.6585140000000038, 'population': 1156, 'numerosity': 1156, 'reliable': 3}
INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 15, 'reward': 1.0, 'perf_time': 2.8204795999999988, 'population': 1999, 'numerosity': 1999, 'reliable': 5}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 26, 'reward': 1.0, 'perf_time': 5.176598100000007, 'population': 2368, 'numerosity': 2368, 'reliable': 7}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 18, 'reward': 1.0, 'perf_time': 4.3749275000000125, 'population': 2886, 'numerosity': 2886, 'reliable': 11}
INFO:lcs.agents.Agent:{'trial': 120, 'steps_in_trial': 14, 'reward': 1.0, 'perf_time': 3.832345500000031, 'population': 3331, 'numerosity': 3331, 'reliable': 10}
INFO:lcs.agents.Agent:{'trial': 140

### RUN ACS2ER Experiments

In [7]:
run_racser_experiments()

INFO:lcs.agents.Agent:{'trial': 20, 'steps_in_trial': 28, 'reward': 1.0, 'perf_time': 0.002755499999921085, 'population': 0, 'numerosity': 0, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 40, 'steps_in_trial': 12, 'reward': 1.0, 'perf_time': 0.0008466000001590146, 'population': 0, 'numerosity': 0, 'reliable': 0}


START - ACS2ER - 3


INFO:lcs.agents.Agent:{'trial': 60, 'steps_in_trial': 33, 'reward': 1.0, 'perf_time': 13.630840299999818, 'population': 793, 'numerosity': 793, 'reliable': 0}
INFO:lcs.agents.Agent:{'trial': 80, 'steps_in_trial': 16, 'reward': 1.0, 'perf_time': 18.571592899999814, 'population': 2245, 'numerosity': 2245, 'reliable': 4}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 19, 'reward': 1.0, 'perf_time': 31.553978200000074, 'population': 3475, 'numerosity': 3475, 'reliable': 12}
INFO:lcs.agents.Agent:{'trial': 120, 'steps_in_trial': 19, 'reward': 1.0, 'perf_time': 39.94625790000009, 'population': 4698, 'numerosity': 4698, 'reliable': 20}
INFO:lcs.agents.Agent:{'trial': 140, 'steps_in_trial': 24, 'reward': 1.0, 'perf_time': 57.799874600000294, 'population': 5411, 'numerosity': 5411, 'reliable': 26}
INFO:lcs.agents.Agent:{'trial': 160, 'steps_in_trial': 12, 'reward': 1.0, 'perf_time': 29.693351100000655, 'population': 5724, 'numerosity': 5724, 'reliable': 27}
INFO:lcs.agents.Agent:{'trial

END - ACS2ER - 3
