# OSP model



In [None]:
%load_ext lab_black
import h5py, pickle, os
import numpy as np
import pandas as pd
from IPython.display import clear_output

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Layer, Input, concatenate, multiply, RepeatVector
from tensorflow.keras.optimizers import Adam, SGD

from meta import gpu_mem_cap, model_cfg
from data_wrangling import sample_generator, my_data
from modeling import attractor, arm_attractor, ModelCheckpoint_custom, rnn
from evaluate import training_history, strain_eval, grain_eval, plot_variables, vis

gpu_mem_cap(2048)  # Put memory cap to allow parallel runs

### Parameters block for Papermill
- Instead of using model_cfg directly, this extra step is needed for batch run using Papermill
- Consider carefully the variable type in each cfg setting (Probably automatically check it later...)
    - Do not use integer (e.g., 1, 2, 3, 0) in variables that can be float32 (e.g., w_oh_noise, tau...)
    - Use integer with a dot instead (e.g., 1., 2., 3., 0.)
- To use attractor, two params must be config, 
    - 1) embed_attractor_cfg --> json cfg file of the pretrain attractor 
    - 2) embed_attractor_h5 --> h5 file of the exact weight (e.g. ep0500.h5 #epoch or c90.h5 #correct rate)

In [None]:
code_name = 'O2P_SGD128_p0_h100_c10_lr008_50M'

sample_name = 'jay'
rng_seed = 999
use_semantic = False

# Model architechture
input_dim = 119
hidden_units = 100
output_dim = 250
cleanup_units = 10

use_attractor = False  # Load pretrained or not

# embed_attractor_cfg = 'models/Attractor_{0:02d}/model_config.json'.format(
#     cleanup_units
# )
# embed_attractor_h5 = 'c00.h5'

rnn_activation = 'sigmoid'
regularizer_const = None
w_initializer = 'glorot_uniform'

p_noise = 0.
tau = 0.2
max_unit_time = 4.

# Training
optimizer = 'sgd'
n_mil_sample = 50.
batch_size = 32
learning_rate = 0.008
save_freq = 5
bq_dataset = None

### Construct model configuration

In [None]:
d = {}
for v in model_cfg.minimal_cfgs:
    d[v] = globals()[v]

for v in model_cfg.aux_cfgs:
    try:
        d[v] = globals()[v]
    except:
        pass

cfg = model_cfg(**d)

tf.random.set_seed(cfg.rng_seed)
data = my_data(cfg)

# Modeling

## Building

In [None]:
def build_model(training=True):
    """
    Create Keras model
    Note that:
    For structural things, such as repeat vector, should build within the model
    For Static calculation of input, it is easier to modify, should build within sample generator
    """

    cfg.noise_on() if training is True else cfg.noise_off()
    input_o = Input(shape=(cfg.input_dim, ), name="Input_O")
    input_o_t = RepeatVector(cfg.n_timesteps, name='Input_Ot')(input_o)

    if cfg.use_semantic == True:
        raw_s_t = Input(
            shape=(cfg.n_timesteps, cfg.output_dim), name='Plaut_St'
        )

        input_p = Input(shape=(cfg.output_dim, ), name='input_P')
        input_p_t = RepeatVector(cfg.n_timesteps, name='Teaching_Pt')(input_p)

        input_s_t = multiply([raw_s_t, input_p_t], name='Input_St')

        combined = concatenate([input_o_t, input_s_t], name='Combined_input')
        rnn_model = rnn(cfg)(combined)
        model = Model([input_o, raw_s_t, input_p], rnn_model)

    else:
        rnn_model = rnn(cfg)(input_o_t)
        model = Model(input_o, rnn_model)

    if cfg.optimizer == 'adam':
        op = Adam(
            learning_rate=cfg.learning_rate,
            beta_1=0.9,
            beta_2=0.999,
            amsgrad=False
        )

    elif cfg.optimizer == 'sgd':
        op = SGD(cfg.learning_rate)

    model.compile(
        loss='binary_crossentropy',
        optimizer=op,
        metrics=['BinaryAccuracy', 'mse']
    )

    model.summary()
    return model


model = build_model(training=True)

## Arming attractor

In [None]:
if cfg.use_attractor is True:
    print('Found attractor info in config (cfg), arming attractor...')
    attractor_cfg = model_cfg(cfg.embed_attractor_cfg, bypass_chk=True)
    attractor_obj = attractor(attractor_cfg, cfg.embed_attractor_h5)
    model = arm_attractor(model, attractor_obj)
    plot_variables(model)
else:
    print('Config indicates no attractor, I have do nothing.')

## Training

In [None]:
checkpoint = ModelCheckpoint_custom(
    cfg.path_weights_checkpoint,
    save_weights_only=True,
    period=cfg.save_freq,
)

history = model.fit(
    sample_generator(cfg, data),
    steps_per_epoch=cfg.steps_per_epoch,
    epochs=cfg.nEpo,
    verbose=0,
    callbacks=[checkpoint]
)

# Saving history and model
pickle_out = open(cfg.path_history_pickle, "wb")
pickle.dump(history.history, pickle_out)
pickle_out.close()

clear_output()
print('Training done')

# Reporting

In [None]:
cfg = model_cfg('models/{}/model_config.json'.format(code_name))
data = my_data(cfg)

### Training history

In [None]:
hist = training_history(cfg.path_history_pickle)
hist.plot_all(cfg.path_plot_folder + 'history.html')

### Parse item level stats

In [None]:
# Must turn training mode off beforuse_semanticvaluation
model = build_model(training=False)

# Strain full model
strain = strain_eval(cfg, data, model)
strain.start_evaluate(
    test_use_semantic=True,
    output=cfg.path_model_folder + 'result_strain_item.csv'
)

# Semantic lesion in Strain
# if cfg.use_semantic == True:
#     strain_ns = strain_eval(cfg, data, model)
#     strain_ns.start_evaluate(
#         test_use_semantic=False,
#         output=cfg.path_model_folder + 'result_strain_ns_item.csv'
#     )

# Grain
grain = grain_eval(cfg, data, model)
grain.start_evaluate(
    test_use_semantic=False,
    output=cfg.path_model_folder + 'result_grain_item.csv'
)

### Strain plots

In [None]:
# vis_ns = vis(
#     cfg.path_model_folder, 'result_strain_ns_item.csv', 'result_grain_item.csv'
# )
# vis_ns.parse_cond_df()
# lesion = vis_ns.plot_dev('acc').properties(title='Semantic lesion')
# strain_plot = full | lesion
# strain_plot

vis = vis(
    cfg.path_model_folder, 'result_strain_item.csv', 'result_grain_item.csv'
)

vis.parse_cond_df()

full = vis.plot_dev_interactive('acc').properties(
    title='Accurcy in all conditions'
)
full.save(cfg.path_plot_folder + 'development_all.html')
full

### Lesion development deep dive

In [None]:
# dev_inter = vis.plot_dev_interactive('acc')
# dev_inter.save(cfg.path_plot_folder + 'interactive_strain_dev_full.html')
# dev_inter

# dev_inter = vis_ns.plot_dev_interactive('acc')
# dev_inter.save(cfg.path_plot_folder + 'interactive_strain_dev_lesion.html')
# dev_inter

### Lesion time plot deep dive

In [None]:
# time_inter = vis.plot_time_interactive('acc')
# time_inter.save(cfg.path_plot_folder + 'interactive_strain_time.html')
# time_inter

### Grain plots

In [None]:
small = vis.plot_dev_interactive('acc_small_grain', exp='grain')
large = vis.plot_dev_interactive('acc_large_grain', exp='grain')
grain_plot = small | large
grain_plot.save(cfg.path_plot_folder + 'grain_by_response.html')
grain_plot

### Words vs. Nonwords

In [None]:
wnw_plot = vis.plot_wnw(['INC_HF', 'ambiguous', 'unambiguous'])
wnw_plot.save(cfg.path_plot_folder + 'word_vs_nonword.html')
wnw_plot

### Model weights and biases

In [None]:
plot_variables(model, cfg.path_plot_folder + 'variables.png')

In [None]:
# !jupyter nbconvert --output-dir=$cfg.path_model_folder --to html OSP_master.ipynb

In [None]:
!sudo poweroff