# OSP model

Changes:
- Absolute path
- Flexible zero-error-radius
- Examine output by teaching signal and slots

## Import libraries

In [None]:
%load_ext lab_black
import h5py, pickle, os
import tensorflow as tf
import numpy as np
import pandas as pd

import meta, data_wrangling, modeling, evaluate
from IPython.display import clear_output

meta.gpu_mem_cap(2048)  # Put memory cap to allow parallel runs
meta.check_gpu()

## Parameters block for Papermill
- Instead of using model_cfg directly, this extra step is needed for batch run using Papermill
- Consider carefully the variable type in each cfg setting (Probably automatically check it later...)
    - Do not use integer (e.g., 1, 2, 3, 0) in variables that can be float32 (e.g., w_oh_noise, tau...)
    - Use integer with a dot instead (e.g., 1., 2., 3., 0.)
- To use attractor, two params must be config, 
    - 1) embed_attractor_cfg --> json cfg file of the pretrain attractor 
    - 2) embed_attractor_h5 --> h5 file of the exact weight (e.g. ep0500.h5 #epoch or c90.h5 #correct rate)

In [None]:
code_name = "large1"

sample_name = "jay"
rng_seed = 53797
use_semantic = False

# Model architechture
input_dim = 119
output_dim = 250
hidden_units = 100
cleanup_units = 20

pretrain_attractor = False  # Load pretrained or not

# embed_attractor_cfg = 'models/Attractor_{0:02d}/model_config.json'.format(
#     cleanup_units
# )
# embed_attractor_h5 = 'c00.h5'

rnn_activation = "sigmoid"
regularizer_const = None
w_initializer = 0.1  # range of uniform random
zero_error_radius = 0.1  # When True, zer value = 0.1, hardcoded in modeling.zer_bce()


p_noise = 0.0
tau = 1 / 3
max_unit_time = 4.0
output_ticks = 2

# Training
optimizer = "adam"
n_mil_sample = 1.0
batch_size = 128
learning_rate = 0.001
save_freq = 10

bq_dataset = None
batch_unique_setting_string = None

## Construct model configuration

In [None]:
d = {}
for v in meta.model_cfg.minimal_cfgs:
    d[v] = globals()[v]

for v in meta.model_cfg.aux_cfgs:
    try:
        d[v] = globals()[v]
    except:
        pass

cfg = meta.model_cfg(**d)

tf.random.set_seed(cfg.rng_seed)
data = data_wrangling.my_data(cfg)

# Modeling

### Custom metrics (in development)

In [None]:
class Out0(tf.keras.metrics.Metric):
    """Export last slot average output in last batch of a epoch
    """

    def __init__(self, name="output0", **kwargs):
        super(Out0, self).__init__(name=name, **kwargs)
        self.out = self.add_weight(name="out0", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.out.assign(tf.reduce_mean(y_pred[y_true == 0]))

    def result(self):
        return self.out

    def reset_states(self):
        self.out.assign(0.0)


class Out1(tf.keras.metrics.Metric):
    """Export last slot average output in last batch of a epoch
    """

    def __init__(self, name="output1", **kwargs):
        super(Out1, self).__init__(name=name, **kwargs)
        self.out = self.add_weight(name="out1", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.out.assign(tf.reduce_mean(y_pred[y_true == 1]))

    def result(self):
        return self.out

    def reset_states(self):
        self.out.assign(0.0)

In [None]:
class MeanOutputSlot10(tf.keras.metrics.Metric):
    """Export last slot average output in last batch of a epoch
    """

    def __init__(self, name="mean_output_slot10", **kwargs):
        super(MeanOutputSlot10, self).__init__(name=name, **kwargs)
        self.out10 = self.add_weight(name="out10", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred_slots = tf.split(y_pred, 10, axis=-1)
        self.out10.assign(tf.reduce_mean(y_pred_slots[9]))

    def result(self):
        return self.out10

    def reset_states(self):
        # The state of the metric will be reset at the start of each epoch.
        self.out10.assign(0.0)


class MeanOutputSlot4(tf.keras.metrics.Metric):
    """Export last slot average output in last batch of a epoch
    """

    def __init__(self, name="mean_output_slot4", **kwargs):
        super(MeanOutputSlot4, self).__init__(name=name, **kwargs)
        self.out4 = self.add_weight(name="out4", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred_slots = tf.split(y_pred, 10, axis=-1)
        self.out4.assign(tf.reduce_mean(y_pred_slots[3]))

    def result(self):
        return self.out4

    def reset_states(self):
        # The state of the metric will be reset at the start of each epoch.
        self.out4.assign(0.0)

In [None]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Layer, Input, concatenate, multiply, RepeatVector
from tensorflow.keras.optimizers import Adam, SGD


def build_model(training=True):
    """
    Create Keras model
    Note that:
    For structural things, such as repeat vector, should build within the model
    For Static calculation of input, it is easier to modify, should build within sample generator
    """

    cfg.noise_on() if training else cfg.noise_off()

    input_o = Input(shape=(cfg.input_dim,), name="Input_O")
    input_o_t = RepeatVector(cfg.n_timesteps, name="Input_Ot")(input_o)

    # Construct semantic input
    if cfg.use_semantic == True:
        raw_s_t = Input(shape=(cfg.n_timesteps, cfg.output_dim), name="Plaut_St")

        input_p = Input(shape=(cfg.output_dim,), name="input_P")
        input_p_t = RepeatVector(cfg.n_timesteps, name="Teaching_Pt")(input_p)

        input_s_t = multiply([raw_s_t, input_p_t], name="Input_St")

        combined = concatenate([input_o_t, input_s_t], name="Combined_input")
        rnn_model = rnn(cfg)(combined)
        model = Model([input_o, raw_s_t, input_p], rnn_model)

    else:
        rnn_model = modeling.rnn(cfg)(input_o_t)
        model = Model(input_o, rnn_model)

    # Select optimizer
    if cfg.optimizer == "adam":
        op = Adam(
            learning_rate=cfg.learning_rate, beta_1=0.0, beta_2=0.999, amsgrad=False
        )

    elif cfg.optimizer == "sgd":
        op = SGD(cfg.learning_rate)

    # Select zero error radius (by chossing custom loss function zer_bce())

    me = [
        "BinaryAccuracy",
        "mse",
        Out0(),
        Out1(),
        MeanOutputSlot4(),
        MeanOutputSlot10(),
    ]

    if cfg.zero_error_radius is not None:
        print(f"Using zero-error-radius of {zero_error_radius}")
        model.compile(
            loss=modeling.CustomBCE(radius=cfg.zero_error_radius),
            optimizer=op,
            metrics=me,
        )

    elif cfg.zero_error_radius is None:
        print(f"No zero-error-radius")
        model.compile(loss="binary_crossentropy", optimizer=op, metrics=me)

    model.summary()
    return model


model = build_model(training=True)

## Arming attractor

In [None]:
if cfg.pretrain_attractor is True:
    print("Found attractor info in config (cfg), arming attractor...")
    attractor_cfg = meta.model_cfg(cfg.embed_attractor_cfg, bypass_chk=True)
    attractor_obj = modeling.attractor(attractor_cfg, cfg.embed_attractor_h5)
    model = modeling.arm_attractor(model, attractor_obj)
    evaluate.plot_variables(model)
else:
    print("Config indicates no attractor, I have do nothing.")

## Training
- ball (id = 96)

In [None]:
data.df_train.loc[
    96,
]

In [None]:
one_input = np.tile(data.x_train[96,], [1, 1])
one_input = one_input.astype("float32")

one_target = np.tile(data.y_train[96,], [1, 1])
one_target = one_target.astype("float32")

In [None]:
tboard = tf.keras.callbacks.TensorBoard(
    log_dir=f"batch_log/{cfg.code_name}", histogram_freq=1
)

history = model.fit(x=one_input, y=one_target, epochs=10, verbose=1, callbacks=[tboard])


clear_output()
print("Training done")

In [None]:
model.fit(x=one_input, y=one_target, epochs=50, verbose=1, callbacks=[tboard])

In [None]:
class PrintAllIO:
    """A Convienient class for printing out all input and activation in layer rnn"""

    def __init__(self, model):
        self.model = model

    def get_rnn(self, name):
        return np.asarray(
            [x.numpy() for x in getattr(model.get_layer("rnn"), name)]
        ).squeeze()

    def print_all(self):

        print("Input h:")
        print(self.get_rnn("input_h_list"))
        print("Activation h:")
        print(self.get_rnn("act_h_list"))

        print("\n")

        print("Input c:")
        print(self.get_rnn("input_c_list"))
        print("Activation c:")
        print(self.get_rnn("act_c_list"))

        print("\n")

        print("Input p:")
        print(self.get_rnn("input_p_list"))
        print("Activation p:")
        print(self.get_rnn("act_p_list"))

In [None]:
model(one_input)

In [None]:
chk0 = PrintAllIO(model)

### Last 2 timesteps activation p

### When target is 1

In [None]:
last1 = chk0.get_rnn("act_p_list")[-1, :]
last1[one_target[0] == 1]

In [None]:
last2 = chk0.get_rnn("act_p_list")[-2, :]
last2[one_target[0] == 1]

### When target is 0

In [None]:
last1[one_target[0] == 0] > 0.1

In [None]:
last2[one_target[0] == 0] > 0.1

- None of the targe == 1 nodes has injected error
- Many target == 0 node still has injected error
- Perhaps sparsity explain the punching throught learning ... same as last expectation