In [2]:
# Logger
import logging
logging.basicConfig(level=logging.INFO)

from time import sleep

import ipywidgets as widgets

import pandas as pd

import numpy as np
from matplotlib import pyplot as plt

# Import local paths
import sys, os
sys.path.append(os.path.abspath('../'))
sys.path.append(os.path.abspath('../openai-envs'))

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs.metrics import population_metrics
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList

# Load environments
import gym
import gym_corridor

# Load utils
from utils.corridor import print_cl, plot_performance

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Parameter selection

In [3]:
# Parameters list
# https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html

# Environment parameters
env_select = widgets.Dropdown(options=[
    ('Corridor 20', 'corridor-20-v0'),
    ('Corridor 40', 'corridor-40-v0'),
    ('Corridor 100', 'corridor-100-v0')], description='Environment: ')

env_params = widgets.VBox(children=[env_select])

# Agent parameters
epsilon_slider = widgets.HBox([
    widgets.Label(value='Exploration rate $\epsilon$: '),
    widgets.FloatSlider(value=1.0, min=0.0, max=1.0, step=0.05)
])

beta_slider = widgets.FloatSlider(value=0.05, min=0.0, max=1.0, step=0.05, description='Beta (learning rate): ')
gamma_slider = widgets.FloatSlider(value=0.95, min=0.0, max=1.0, step=0.05, description='Gamma: ')
do_ga_chbkx = widgets.Checkbox(value=True, description='Genetic Generalization')
agent_params = widgets.VBox(children=[epsilon_slider, beta_slider, gamma_slider, do_ga_chbkx])

# Experiment parameters
explore_trials = widgets.IntSlider(value=300, min=10, max=10_000, step=10, description='Explore trials: ')
exploit_trials = widgets.IntSlider(value=100, min=0, max=1000, step=10, description='Exploit trials: ')
metrics_freq = widgets.IntSlider(value=5, min=0, max=100, step=10, description='Metric freq: ')

exp_params = widgets.VBox(children=[
    widgets.HBox(children=[explore_trials, exploit_trials]),
    metrics_freq
])

# Rendering final tab
tabs = widgets.Tab(children=[env_params, agent_params, exp_params])
tabs.set_title(0, 'Environment')
tabs.set_title(1, 'Agent')
tabs.set_title(2, 'Experiment')

display(tabs)

Tab(children=(VBox(children=(Dropdown(description='Environment: ', options=(('Corridor 20', 'corridor-20-v0'),…

In [4]:
def start_experiment():
    # Prepare the environment
    env = gym.make(env_select.value)
    
    # Build agent configuration
    # Collect additional population metrics
    def corridor_metrics(pop, env):
        metrics = {}
        metrics.update(population_metrics(pop, env))
        return metrics
    
    class CorridorAdapter(EnvironmentAdapter):
        @staticmethod
        def to_genotype(phenotype):
            return phenotype,

    cfg = Configuration(
            classifier_length=1,
            number_of_possible_actions=2,
            epsilon=epsilon_slider.children[1].value,
            beta=beta_slider.value,
            gamma=gamma_slider.value,
            theta_exp=50,
            theta_ga=50,
            do_ga=do_ga_chbkx.value,
            mu=0.03,
            u_max=1,
            metrics_trial_frequency=exp_params.children[1].value,
            environment_adapter=CorridorAdapter,
            user_metrics_collector_fcn=corridor_metrics)
    
    print(cfg)
    sleep(1)
    
    # Perform environment interaction
    explore_trials, exploit_trials = map(lambda x: x.value, exp_params.children[0].children)
    
    explorer = ACS2(cfg)
    population_explore, metrics_explore = explorer.explore(env, explore_trials)
    
    exploiter = ACS2(cfg, population_explore)
    population_exploit, metrics_exploit = explorer.exploit(env, exploit_trials)
    
    # Parse results into DataFrame
    df = parse_experiments_results(metrics_explore, metrics_exploit, cfg.metrics_trial_frequency)
    
    # Plot results
    plot_performance(df, population_exploit)

In [5]:
# Build button for running simulation
out = widgets.Output()

@out.capture()
def btn_clicked(btn):
    out.clear_output()
    start_experiment()
    plt.show()
    
b = widgets.Button(
    description="Run simulation",
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    icon="check")

b.on_click(btn_clicked)

# Simulation

In [6]:
display(b)
display(out)

Button(button_style='info', description='Run simulation', icon='check', style=ButtonStyle())

Output()