In [10]:
import logging
logging.basicConfig(level=logging.WARN)

import math
from copy import copy

import gym

from lcs import Perception
from lcs.agents import EnvironmentAdapter
from lcs.metrics import population_metrics
from lcs.agents.acs2 import ACS2, Configuration
from lcs.strategies.action_selection import BestAction, EpsilonGreedy, KnowledgeArray, ActionDelay

from utils.experiments import avg_experiments
from utils.plotting import COLORS

import numpy as np

In [2]:
buckets = (1, 1, 6, 6)  # orig: (1, 1, 3, 6)

In [4]:
env = gym.make('CartPole-v0')

In [5]:
_high = [env.observation_space.high[0], 0.5, env.observation_space.high[2], 3500]
_low = [env.observation_space.low[0], -0.5, env.observation_space.low[2], -3500]

_high, _low

([4.8, 0.5, 0.41887903, 3500], [-4.8, -0.5, -0.41887903, -3500])

In [6]:
class CartPoleAdapter(EnvironmentAdapter):
    @classmethod
    def to_genotype(cls, obs):
        ratios = [(obs[i] + abs(_low[i])) / (_high[i] - _low[i]) for i in range(len(obs))]
        new_obs = [int(round((buckets[i] - 1) * ratios[i])) for i in range(len(obs))]
        new_obs = [min(buckets[i] - 1, max(0, new_obs[i])) for i in range(len(obs))]
        return [str(o) for o in new_obs]

In [7]:
def cp_metrics(pop, env):
    metrics = {}
    metrics.update(population_metrics(pop, env))
    
    return metrics

In [11]:
base_params = {
    "classifier_length": 4,
    "number_of_possible_actions": 2,
    "epsilon": 0.9,
    "beta": 0.01,
    "gamma": 0.995,
    "initial_q": 0.5,
    "theta_exp": 50,
    "theta_ga": 50,
    "do_ga": True,
    "chi": 0.0,  # do not cross-over
    "mu": 0.03,
    "metrics_trial_frequency": 2,
    "environment_adapter": CartPoleAdapter,
    "user_metrics_collector_fcn": cp_metrics
}

cfg = Configuration(**base_params)

In [18]:
explorer = ACS2(cfg)
population_explore, metrics_explore = explorer.explore(env, 500)

[##12 1 ####             (empty)               q: 0.996 r: 1.384  ir: 0.9489 f: 1.379  exp: 286 tga: 13575 talp: 13577 tav: 49.8   num: 19,
 ##32 1 ####             (00##)                q: 0.947 r: 3.327  ir: 0.9999 f: 3.15   exp: 933 tga: 13586 talp: 13618 tav: 15.2   num: 1,
 ##22 1 ####             (00##)                q: 0.968 r: 2.78   ir: 1.0    f: 2.692  exp: 1939 tga: 13550 talp: 13623 tav: 8.44   num: 1,
 ##43 0 ####             (empty)               q: 0.997 r: 1.341  ir: 0.9505 f: 1.337  exp: 251 tga: 13546 talp: 13547 tav: 53.6   num: 19,
 ##12 0 ####             (00##)                q: 0.97  r: 1.439  ir: 0.9836 f: 1.395  exp: 350 tga: 13577 talp: 13623 tav: 41.5   num: 1,
 ##23 1 ###2             (00##)                q: 0.9   r: 3.045  ir: 1.0    f: 2.742  exp: 1016 tga: 13593 talp: 13596 tav: 11.4   num: 1,
 ##23 0 ####             (00##)                q: 0.924 r: 3.334  ir: 0.9999 f: 3.08   exp: 881 tga: 13595 talp: 13597 tav: 16.4   num: 2,
 ##33 0 ####           

In [21]:
reliable = [cl for cl in population_explore if cl.is_reliable()]
sorted(reliable, key=lambda cl: -cl.fitness)

[##32 1 ####             (00##)                q: 0.947 r: 3.327  ir: 0.9999 f: 3.15   exp: 933 tga: 13586 talp: 13618 tav: 15.2   num: 1,
 ##23 0 ####             (00##)                q: 0.924 r: 3.334  ir: 0.9999 f: 3.08   exp: 881 tga: 13595 talp: 13597 tav: 16.4   num: 2,
 ##23 1 ###2             (00##)                q: 0.9   r: 3.045  ir: 1.0    f: 2.742  exp: 1016 tga: 13593 talp: 13596 tav: 11.4   num: 1,
 ##22 1 ####             (00##)                q: 0.968 r: 2.78   ir: 1.0    f: 2.692  exp: 1939 tga: 13550 talp: 13623 tav: 8.44   num: 1,
 ##33 0 ####             (00##)                q: 0.988 r: 2.676  ir: 1.0    f: 2.645  exp: 1815 tga: 13582 talp: 13604 tav: 7.58   num: 1,
 ##43 1 ####             (00##)                q: 0.952 r: 1.469  ir: 0.9902 f: 1.399  exp: 277 tga: 13547 talp: 13547 tav: 45.0   num: 1,
 ##12 0 ####             (00##)                q: 0.97  r: 1.439  ir: 0.9836 f: 1.395  exp: 350 tga: 13577 talp: 13623 tav: 41.5   num: 1,
 ##12 1 ####            