# Woods1 and ACS2

In [1]:
import pandas as pd
from lcs.metrics import population_metrics
from lcs.agents import EnvironmentAdapter



def common_metrics(agent, env):
    metrics = {}

    pop = agent.get_population()
    agent_name = agent.__class__.__name__

    if hasattr(agent, 'rho'):
        metrics['rho'] = agent.rho
        agent_name += "_v" + agent.cfg.rho_update_version
    else:
        metrics['rho'] = 0

    metrics['agent'] = agent_name
    metrics['reliable'] = len([cl for cl in pop if cl.is_reliable()])

    metrics.update(population_metrics(pop, env))

    return metrics

## Woods1

In [2]:
import gym
import gym_corridor
import gym_woods

In [3]:
env = gym.make('Woods1-v0')

In [4]:
env.reset()
env.render()


[37m□[0m [37m□[0m [37m□[0m [37m□[0m [37m□[0m
[37m□[0m [30m■[0m [30m■[0m [33m$[0m [37m□[0m
[37m□[0m [30m■[0m [30m■[0m [30m■[0m [37m□[0m
[37m□[0m [30m■[0m [30m■[0m [30m■[0m [37m□[0m
[37m□[0m [37m□[0m [36mX[0m [37m□[0m [37m□[0m


In [5]:
def to_df(m):
    df = pd.DataFrame(m)
    df.set_index('trial', inplace=True)
    return df

In [6]:
class CorridorAdapter(EnvironmentAdapter):
    @staticmethod
    def to_genotype(phenotype):
        return phenotype,

## ACS2

In [7]:
import lcs.agents.acs2 as acs2

acs2_cfg = acs2.Configuration(8, 8,
                              epsilon=0.99,
                              metrics_trial_frequency=1)

## Experiments

In [8]:
acs2_agent = acs2.ACS2(cfg=acs2_cfg)

In [9]:
%%time

# explore
pop_acs2_explr, m_acs2_explr = acs2_agent.explore(env, 1000)

CPU times: user 16.8 s, sys: 7.22 ms, total: 16.8 s
Wall time: 16.8 s


In [10]:
len(pop_acs2_explr)

180

In [11]:
reliable = [cl for cl in pop_acs2_explr if cl.is_reliable()]

for cl in sorted(reliable, key=lambda cl: -cl.fitness)[:15]:
    print(f'{cl.condition}-{cl.action}-{cl.effect}\tq: {cl.q:.2}\tr: {cl.r:06.2f}')

####.F.#-5-####OOO#	q: 1.0	r: 999.95
####F#.#-4-####O#O#	q: 1.0	r: 999.90
###F##.#-3-###.##O#	q: 1.0	r: 999.86
####.##F-7-####O##.	q: 1.0	r: 999.89
###FO#.#-3-###.##O#	q: 1.0	r: 999.72
##...#F#-6-####O#O#	q: 1.0	r: 998.81
####.#F#-6-####O#O#	q: 1.0	r: 998.85
###..#F#-6-####O#O#	q: 1.0	r: 998.81
##.O..##-6-###.#F##	q: 1.0	r: 949.36
#..O..##-6-###.#F##	q: 1.0	r: 949.36
.O.##.##-5-#.###F##	q: 1.0	r: 949.32
.O..#.#.-5-#.###F##	q: 1.0	r: 949.32
.O.##.#.-5-#.###F##	q: 1.0	r: 949.32
OO#...#.-3-..#FOO##	q: 1.0	r: 949.16
#.OO#.##-7-##..#F##	q: 1.0	r: 948.95


In [12]:
df_explr = to_df(m_explr)
avg_steps = df_explr['steps_in_trial'].mean()
print(f'Average steps to reward {avg_steps:.2f}')

NameError: name 'm_explr' is not defined

In [None]:
%%time

# exploit
acs2_agent_exploit = acs2.ACS2(population=pop_acs2_explr, cfg=acs2_cfg)
pop_explt, m_explt = agent.exploit(env, 15)

In [None]:
df_explt = to_df(m_explt)
avg_steps = df_explt['steps_in_trial'].mean()
print(f'Average steps to reward {avg_steps:.2f}')

## AACS2

In [None]:
import lcs.agents.aacs2 as aacs2

aacs2_cfg = aacs2.Configuration(1, 2,
                                epsilon=0.2,
                                rho_update_version='2',
                                user_metrics_collector_fcn=common_metrics,
                                environment_adapter=CorridorAdapter,
                                metrics_trial_frequency=1)

aacs2_agent = aacs2.AACS2(cfg=aacs2_cfg)

In [None]:
%%time

# explore
pop_explr, m_explr = aacs2_agent.explore(env, 10_000)

In [None]:
df_aacs2_explr = to_df(m_explr)
aacs2_avg_steps = df_aacs2_explr['steps_in_trial'].mean()
print(f'Average steps to reward {aacs2_avg_steps:.2f}')

In [None]:
df_aacs2_explr

In [None]:
%%time

# exploit
aacs2_agent_exploit = aacs2.AACS2(population=pop_explr, cfg=aacs2_cfg)
pop_aacs2_explt, m_aacs2_explt = aacs2_agent_exploit.exploit(env, 15)

In [None]:
df_aacs2_explt = to_df(m_aacs2_explt)

In [None]:
avg_steps = df_aacs2_explt['steps_in_trial'].mean()
print(f'Average steps to reward {avg_steps:.2f}')