# O2S model



In [None]:
%load_ext lab_black

import tensorflow as tf
import numpy as np
import pandas as pd

### Parameters block for Papermill
- Instead of using model_cfg directly, this extra step is needed for batch run using Papermill

In [None]:
code_name = 'O2S_v0000'

embedding = 'tasa'
sample_name = 'hal'  #hal log frequency with clipping like HS04 (did not use it for control...)
sample_rng_seed = 1234
tf_rng_seed = 4321

# Model architechture
o_input_dim = 119
hidden_units = 100
cleanup_units = 50
rnn_activation = 'sigmoid'
regularizer_const = 0.

p_noise = 0.  # i.e. w_pp, w_pc, and w_cp noise
tau = 1.
max_unit_time = 2.

# Training
n_mil_sample = 1.
batch_size = 128
learning_rate = 0.0001
save_freq = 5

In [None]:
x_name = 'x_train_{}.npz'.format(embedding)
y_name = 'y_train_{}.npz'.format(embedding)
csv_name = 'df_train_{}.csv'.format(embedding)

if embedding == 'tasa':
    sem_units = 300
if embedding == 'bert':
    sem_units = 1024

### Packing parameters into model_cfg

In [None]:
from meta import model_cfg

cfg = model_cfg(
    code_name=code_name,
    x_name=x_name,
    y_name=y_name,
    csv_name=csv_name,
    sample_name=sample_name,
    sample_rng_seed=sample_rng_seed,
    tf_rng_seed=tf_rng_seed,
    use_semantic=False,
    sem_param_gf=0,
    sem_param_gi=0,
    sem_param_kf=0,
    sem_param_ki=0,
    sem_param_hf=0,
    sem_param_hi=0,
    o_input_dim=o_input_dim,
    hidden_units=hidden_units,
    pho_units=sem_units,  # Output become semantic embedding vector
    cleanup_units=cleanup_units,
    embed_attractor_cfg=None,
    embed_attractor_h5=None,
    w_oh_noise=0.,
    w_hp_noise=0.,
    w_pp_noise=p_noise,
    w_pc_noise=p_noise,
    w_cp_noise=p_noise,
    tau=tau,
    max_unit_time=max_unit_time,
    n_mil_sample=n_mil_sample,
    batch_size=batch_size,
    rnn_activation=rnn_activation,
    regularizer_const=regularizer_const,
    learning_rate=learning_rate,
    save_freq=save_freq,
    bq_dataset=None
)

# TF random seed (Sampling is out of TF scope... change sample_rng_seed instead)
tf.random.set_seed(cfg.tf_rng_seed)

# Preload data
from data_wrangling import sample_generator, my_data
data = my_data(cfg)

# Modeling

## Building

In [None]:
def build_model(training=True):
    # Organization principal:
    # Structure things, such as repeat vector should build within the model
    # Static calculation of input --> Easier to modify --> build within sample generator

    from tensorflow.keras import Model
    from tensorflow.keras.layers import Layer, Input, concatenate, multiply, RepeatVector, Dense
    from tensorflow.keras.optimizers import Adam
    from modeling import rnn
    #     from modeling_without_cleanup import rnn_no_cleanup_no_pp

    # Train/test mode checking
    cfg.noise_on() if training is True else cfg.noise_off()

    input_o = Input(shape=(cfg.o_input_dim, ), name="Orthography")
    hidden = Dense(cfg.hidden_units, name="Hidden")(input_o)
    output = Dense(sem_units, name="Semantics")(hidden)
    model = Model(input_o, output)

    model.compile(
        loss='mse',
        optimizer=Adam(
            learning_rate=cfg.learning_rate,
            beta_1=0.9,
            beta_2=0.999,
            amsgrad=False
        ),
        metrics=['accuracy', 'mse']
    )

    model.summary()
    return model


model = build_model(training=True)

## Training

In [None]:
import h5py, pickle, os
from tensorflow.keras.callbacks import ModelCheckpoint
from data_wrangling import sample_generator
from IPython.display import clear_output

checkpoint = ModelCheckpoint(
    cfg.path_weights_checkpoint,
    verbose=1,
    save_freq=cfg.save_freq_sample,
    save_weights_only=True
)

history = model.fit(
    sample_generator(cfg, data),
    steps_per_epoch=cfg.steps_per_epoch,
    epochs=cfg.nEpo,
    verbose=2,
    callbacks=[checkpoint],
)

# Saving history and model
pickle_out = open(cfg.path_history_pickle, "wb")
pickle.dump(history.history, pickle_out)
pickle_out.close()

clear_output()
print('Training done')


# Reporting

### Training history

In [None]:
from evaluate import training_history

hist = training_history(cfg.path_history_pickle)
hist.plot_mse()

### Parse item level stats

In [None]:
# Must turn training mode off before evaluation
model = build_model(training=False)

model.evaluate(data.x_train, data.y_train)

In [None]:
y_pred = model.predict(data.x_train)

In [None]:
import altair as alt

df = pd.DataFrame()
y_pred.shape

In [None]:
mse_words = tf.metrics.mse(y_pred, data.y_train).numpy()

In [None]:
data.df_train['model_mse'] = tf.metrics.mse(y_pred, data.y_train).numpy()

In [None]:
import altair as alt




In [None]:
# Semantic lesion in Strain
strain_ns = strain_eval(cfg, data, model)
strain_ns.start_evaluate(
    test_use_semantic=False,
    output=cfg.path_model_folder + 'result_strain_ns_item.csv'
)

In [None]:
# Grain
model = build_model(training=False)
from evaluate import strain_eval, grain_eval
grain = grain_eval(cfg, data, model)
grain.start_evaluate(
    test_use_semantic=False,
    output=cfg.path_model_folder + 'result_grain_item.csv'
)

### Strain plots

In [None]:
from evaluate import vis

vis_ns = vis(
    cfg.path_model_folder, 'result_strain_ns_item.csv', 'result_grain_item.csv'
)

vis = vis(
    cfg.path_model_folder, 'result_strain_item.csv', 'result_grain_item.csv'
)

vis_ns.parse_cond_df()
vis.parse_cond_df()

full = vis.plot_dev('acc').properties(title='Full input')
lesion = vis_ns.plot_dev('acc').properties(title='Semantic lesion')

strain_plot = full | lesion
strain_plot.save(cfg.path_plot_folder + 'strain.html')
strain_plot

### Lesion development deep dive

In [None]:
dev_inter = vis_ns.plot_dev_interactive('acc')
dev_inter.save(cfg.path_plot_folder + 'interactive_strain_dev.html')
dev_inter

### Lesion time plot deep dive

In [None]:
time_inter = vis_ns.plot_time_interactive('acc')
time_inter.save(cfg.path_plot_folder + 'interactive_strain_time.html')
time_inter

### Grain plots

In [None]:
small = vis.plot_dev('acc_small_grain', exp='grain')
large = vis.plot_dev('acc_large_grain', exp='grain')
grain_plot = small | large
grain_plot.save(cfg.path_plot_folder + 'grain.html')
grain_plot

### Imageability effect

In [None]:
# vis.parse_cond_df(cond_strain='cond_img')
# vis.plot_dev('acc', exp='strain')

### Frequency effect

In [None]:
# vis.parse_cond_df(cond_strain='cond_wf')
# vis.plot_dev('acc', exp='strain')

### Phonological regularity effect

In [None]:
# vis.parse_cond_df(cond_strain='cond_pho')
# vis.plot_dev('acc', exp='strain')

### Model weights and biases

In [None]:
from evaluate import plot_variables
plot_variables(model, cfg.path_plot_folder + 'variables.png')

# Saving results

### Write notebook to html (Must save notebook first)

In [None]:
# # Only work for manual run
# !jupyter nbconvert --to html --ExecutePreprocessor.store_widget_state=True --output-dir=$cfg.path_model_folder basicOSP_master.ipynba

### Push results to GCP-BQ

In [None]:
# if cfg.bq_dataset is not None:
#     from meta import write_all_to_bq

#     for attempt in range(10):
#         try:
#             write_all_to_bq(cfg, strain.i_hist, grain.i_hist)
#             print('Results pushed to BQ')
#         except:
#             from time import sleep
#             sleep(10)
#         else:
#             break