
    This Source Code Form is subject to the terms of the Mozilla Public
    License, v. 2.0. If a copy of the MPL was not distributed with this
    file, You can obtain one at http://mozilla.org/MPL/2.0/.


In [None]:
# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# To ease the loading of modules
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load BEACS module
from agents.beacs import BEACS, BEACSConfiguration

# Load particular Environment Adapter
from my_examples.adapter.CartPoleEnvironmentAdapter import CartPoleEnvironmentAdapter

# Load Metrics
from my_examples.metrics.CartPoleMetrics import _cartpole_metrics, _mean_reliable_classifier_specificity

# Load Plotting Wrappers
from my_examples.metrics.CartPolePlottingWrapper import \
    parse_metrics_to_df, \
    plot_cartpole_performance, \
    plot_average_cartpole_performance

# Load environments
import gym

# Agent - BEACS

## Common parameters

In [None]:
#Environmental Set Up
CLASSIFIER_LENGTH = 4
NUMBER_OF_POSSIBLE_ACTIONS = 2
CARTPOLE = "CartPole-v0" #CartPole-v1 goes up to 500 episodes

#Exploration Set Up
NUMBER_OF_EXPLORE_TRIALS = 500
METRICS_TRIAL_FREQUENCY_EXPLORE = 1
EPSILON = 0.8
BETA_ALP = 0.05

#Exploitation Set Up
NUMBER_OF_EXPLOIT_TRIALS_RL = 500
BETA_EXPLOIT_RL = 0.05

#RL Set Up
GAMMA = 0.95
BETA_RL = 0.05

#GA Set Up
CROSSOVER = 0.8
MUTATION = 0.3

#BEACS Set Up
APPLY_CRACS = True
ENABLE_EP = True
LENGTH_OF_BEHAVIORAL_SEQUENCES = 0

## Environment - CartPole

In [None]:
# Initialize environment
CartPole = gym.make(CARTPOLE)
# Reset it, by putting an agent into random position
state = CartPole.reset()

## Training of BEACS - Exploration

In [None]:
%%time

cfg_explore = BEACSConfiguration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    environment_adapter=CartPoleEnvironmentAdapter,
    user_metrics_collector_fcn=_cartpole_metrics,
    metrics_trial_frequency=METRICS_TRIAL_FREQUENCY_EXPLORE,
    do_ep=ENABLE_EP,
    beta_alp=BETA_ALP,
    beta_rl=BETA_RL,
    gamma=GAMMA,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    mu=MUTATION,
    chi=CROSSOVER,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

agent_explore = BEACS(cfg_explore)
population_explore, metrics_explore = agent_explore.explore(CartPole, NUMBER_OF_EXPLORE_TRIALS)

### Performance

In [None]:
metrics_trial_frequency_explore = cfg_explore.metrics_trial_frequency

metrics_df = parse_metrics_to_df(metrics_explore, metrics_trial_frequency_explore, None, None)
plot_cartpole_performance(agent_explore, CartPole, metrics_df, cfg_explore, CARTPOLE, metrics_trial_frequency_explore, None)

In [None]:
print(_cartpole_metrics(population_explore, CartPole))
print("Mean reliable population specifity is ",
      _mean_reliable_classifier_specificity(population_explore, CartPole))

if APPLY_CRACS:
    agent_explore.apply_CRACS()
    population_explore = agent_explore.get_population()

print(_cartpole_metrics(population_explore, CartPole))
print("Mean reliable population specifity is ",
      _mean_reliable_classifier_specificity(population_explore, CartPole))

In [None]:
population_explore.sort(key=lambda cl: -cl.fitness)
population_explore_to_display = [cl for cl in population_explore]
print(len(population_explore_to_display))
print("\n")
for cl in population_explore_to_display[:]:
    print(cl)
    print("\n")

In [None]:
agent_explore.get_pai_states_memory()

## Training of BEACS - Exploitation

In [None]:
%%time

cfg_exploit_rl = BEACSConfiguration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    environment_adapter=CartPoleEnvironmentAdapter,
    user_metrics_collector_fcn=_cartpole_metrics,
    metrics_trial_frequency=1,
    do_ep=ENABLE_EP,
    beta_alp=BETA_ALP,
    beta_rl=BETA_EXPLOIT_RL,
    gamma=GAMMA,
    epsilon=0.0,
    u_max=CLASSIFIER_LENGTH,
    bs_max=LENGTH_OF_BEHAVIORAL_SEQUENCES
)

agent_exploit_rl = BEACS(cfg_exploit_rl, population_explore)
population_exploit_rl, metrics_exploit_rl = agent_exploit_rl.exploit(CartPole, NUMBER_OF_EXPLOIT_TRIALS_RL)

### Performance

In [None]:
metrics_trial_frequency_explore = cfg_explore.metrics_trial_frequency
metrics_trial_frequency_exploit = 1

metrics_df = parse_metrics_to_df(metrics_explore, metrics_trial_frequency_explore, metrics_exploit_rl, metrics_trial_frequency_exploit)
plot_cartpole_performance(agent_exploit_rl, CartPole, metrics_df, cfg_exploit_rl, CARTPOLE, metrics_trial_frequency_explore, [NUMBER_OF_EXPLOIT_TRIALS_RL])

In [None]:
trials=[]

avg_step_explore = 0
for trial in metrics_explore:
    trials.append(trial['steps_in_trial'])
    avg_step_explore += trial['steps_in_trial']
avg_step_explore /= NUMBER_OF_EXPLORE_TRIALS / metrics_trial_frequency_explore
print("Average number of steps to solve the cartpole is ",avg_step_explore,
      " for a total of ", NUMBER_OF_EXPLORE_TRIALS, " trials in EXPLORATION")

if NUMBER_OF_EXPLOIT_TRIALS_RL:
    avg_step_exploit_rl = 0
    for trial in metrics_exploit_rl:
        trials.append(trial['steps_in_trial'])
        avg_step_exploit_rl += trial['steps_in_trial']
    avg_step_exploit_rl /= NUMBER_OF_EXPLOIT_TRIALS_RL
    print("Average number of steps to solve the cartpole is ",avg_step_exploit_rl,
          " for a total of ", NUMBER_OF_EXPLOIT_TRIALS_RL, " trials in EXPLOITATION with Reinforcement Module")

print(_cartpole_metrics(population_explore, CartPole))
print(_cartpole_metrics(population_exploit_rl, CartPole))

In [None]:
plot_average_cartpole_performance(trials)

In [None]:
# population_exploit_rl.sort(key=lambda cl: -cl.fitness)
population_exploit_rl_to_display = [cl for cl in population_exploit_rl 
    if cl.behavioral_sequence or cl.is_enhanced()]
print(len(population_exploit_rl_to_display))
print("\n")
for cl in population_exploit_rl_to_display[:5]:
    print(cl)
    print("\n")