
    This Source Code Form is subject to the terms of the Mozilla Public
    License, v. 2.0. If a copy of the MPL was not distributed with this
    file, You can obtain one at http://mozilla.org/MPL/2.0/.


In [None]:
# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load BACS module
from bacs.agents.bacs import BACS, Configuration

from bacs.agents.bacs.utils.Plotting_wrapper import \
    parse_metrics_to_df, \
    plot_performance

from bacs.metrics import \
    _maze_metrics, \
    _how_many_pees_match_non_aliased_states, \
    _mean_reliable_classifier_specificity, \
    _when_full_knowledge_is_achieved, \
    _state_of_population

# Load environments
import gym
import my_mazes

# Agent - BACS

## Common parameters

In [None]:
CLASSIFIER_LENGTH = 8
# Modifier perception de maze.py de l'environnement my_mazes
RANDOM_ATTRIBUTES = 0
NUMBER_OF_POSSIBLE_ACTIONS = 8

MAZE = "Woods100-v0"
PROB_SLIPPERY = 0.

NUMBER_OF_EXPLORE_TRIALS = 5000
METRICS_TRIAL_FREQUENCY_EXPLORE = 1
BETA_EXPLORE = 0.05
EPSILON = 0.8

DO_GA = True
ENABLE_PEE = True

## Environment - Maze

In [None]:
# Initialize environment
maze = gym.make(MAZE)
maze.env.set_prob_slippery(PROB_SLIPPERY)
# Reset it, by putting an agent into random position
situation = maze.reset()
# Render the state in ASCII
maze.render('aliasing_human')

## Training of BACS - Exploration

In [None]:
%%time

cfg_explore = Configuration(
    classifier_length=CLASSIFIER_LENGTH+RANDOM_ATTRIBUTES,
    number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
    user_metrics_collector_fcn=_maze_metrics,
    metrics_trial_frequency=METRICS_TRIAL_FREQUENCY_EXPLORE,
    do_pee=ENABLE_PEE,
    do_ga=DO_GA,
    beta=BETA_EXPLORE,
    gamma=0.95,
    theta_i=0.1,
    theta_r=0.9,
    epsilon=EPSILON,
    u_max=CLASSIFIER_LENGTH,
    theta_exp=20,
    theta_ga=100,
    theta_as=20,
    mu=0.3,
    chi=0.8,
    bs_max=0
)

agent_explore = BACS(cfg_explore)
population_explore, metrics_explore = agent_explore.explore(maze, NUMBER_OF_EXPLORE_TRIALS)

### Performance

In [None]:
metrics_trial_frequency_explore = cfg_explore.metrics_trial_frequency

metrics_df = parse_metrics_to_df(metrics_explore, metrics_trial_frequency_explore, None, None)
plot_performance(agent_explore, maze, metrics_df, cfg_explore, MAZE, metrics_trial_frequency_explore, None)

In [None]:
print(_maze_metrics(population_explore, maze))

first_trial, stable_trial = _when_full_knowledge_is_achieved(metrics_explore)

print("Full knowledge was achieved at trials ", first_trial, " and was stable at ", stable_trial)

print("There are ", _how_many_pees_match_non_aliased_states(population_explore, maze),
      " enhanced classifer(s) that match a non-aliased states.")

print("Mean reliable population specifity is ",
      _mean_reliable_classifier_specificity(population_explore, maze))

print(_state_of_population(metrics_explore, first_trial))

In [None]:
population_explore.sort(key=lambda cl: -cl.fitness)
population_explore_to_display = [cl for cl in population_explore
    if cl.is_reliable()]
for cl in population_explore_to_display:
    print(cl)
    print("\n")

In [None]:
print(len(population_explore))
print(len([cl for cl in population_explore if cl.is_reliable()]))
print(len(list(dict.fromkeys(population_explore))))
print(len([cl for cl in list(dict.fromkeys(population_explore)) if cl.is_reliable()]))