# TF model v4.0

HS04 model incorporating non-stationary environment

- *50 sem_cleanup*
- *50 pho_cleanup*
- *500 sem_pho_hidden_units*
- *500 pho_sem_hidden_units*
- *4 output_ticks* 
- *No auto-connection lock*
- *Attractor clamped for 8 steps, free for last 4 steps*
- implemented only in modeling.py but not the generator, it will just drop the extra generated time ticks
- can do both phase 1 (oral) and 2 (reading) in this notebook

In [None]:
%load_ext lab_black
import pickle, os, time
import tensorflow as tf
import numpy as np
import pandas as pd
from IPython.display import clear_output
import meta, data_wrangling, modeling, metrics, evaluate

# meta.limit_gpu_memory_use(7000)

# Parameters block (for papermill)

In [None]:
code_name = "tmp"
tf_root = "/home/jupyter/tf"

# Model architechture
ort_units = 119
pho_units = 250
sem_units = 2446

hidden_os_units = 500
hidden_op_units = 100
hidden_ps_units = 500
hidden_sp_units = 500

pho_cleanup_units = 50
sem_cleanup_units = 50

pho_noise_level = 0.0
sem_noise_level = 0.0

activation = "sigmoid"
tau = 1 / 3
max_unit_time = 4.0
output_ticks = 12
inject_error_ticks = 11

# Training
sample_name = "flexi_rank"
wf_low_clip = 0
wf_high_clip = 100_000_000_000
wf_compression = 'log'
sampling_plateau = 900_000

rng_seed = 2021
learning_rate = 0.005
n_mil_sample = 1.0
zero_error_radius = 0.1
batch_size = 100
save_freq = 10

batch_name = None

# tasks = ["pho_sem", "sem_pho", "pho_pho", "sem_sem"]
# tasks_probability = [0.4, 0.4, 0.1, 0.1]

tasks = ["ort_pho"]
tasks_probability = [1.0]




In [None]:
# cfg = meta.ModelConfig.from_json(os.path.join(tf_root, 'models', code_name, 'model_config.json'))

In [None]:
# Load global cfg variables into a dictionary for feeding into ModelConfig()

config_dict = {}
for v in meta.CORE_CONFIGS:
    try:
        config_dict[v] = globals()[v]
    except:
        raise

for v in meta.OPTIONAL_CONFIGS:
    try:
        config_dict[v] = globals()[v]
    except:
        pass

# Construct ModelConfig object
cfg = meta.ModelConfig(**config_dict)
cfg.save()
del config_dict

# Build model and all supporting components

In [None]:
tf.random.set_seed(cfg.rng_seed)
data = data_wrangling.MyData()
model = modeling.HS04Model(cfg)
model.build()
sampler = data_wrangling.FastSampling(cfg, data)

In [None]:
# Full set of task specific components

generators = {
    "ort_pho": sampler.sample_generator(x="ort", y="pho"),
    "pho_sem": sampler.sample_generator(x="pho", y="sem"),
    "sem_pho": sampler.sample_generator(x="sem", y="pho"),
    "pho_pho": sampler.sample_generator(x="pho", y="pho"),
    "sem_sem": sampler.sample_generator(x="sem", y="sem"),
    "triangle": sampler.sample_generator(x="ort", y=["pho", "sem"]),
}

# Instantiate optimizer for each task
optimizers = {
    "ort_pho": tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate),
    "pho_pho": tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate),
    "sem_sem": tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate),
    "pho_sem": tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate),
    "sem_pho": tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate),
    "triangle": tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate),
}

# Instantiate loss_fn for each task
loss_fns = {
    "ort_pho": metrics.CustomBCE(radius=cfg.zero_error_radius),
    "pho_pho": metrics.CustomBCE(radius=cfg.zero_error_radius),
    "sem_sem": metrics.CustomBCE(radius=cfg.zero_error_radius),
    "pho_sem": metrics.CustomBCE(radius=cfg.zero_error_radius),
    "sem_pho": metrics.CustomBCE(radius=cfg.zero_error_radius),
    "triangle": metrics.CustomBCE(radius=cfg.zero_error_radius),
}

# Mean loss (for TensorBoard)
train_losses = {
    "ort_pho": tf.keras.metrics.Mean("train_loss_ort_pho", dtype=tf.float32),
    "pho_pho": tf.keras.metrics.Mean("train_loss_pho_pho", dtype=tf.float32),
    "sem_sem": tf.keras.metrics.Mean("train_loss_sem_sem", dtype=tf.float32),
    "pho_sem": tf.keras.metrics.Mean("train_loss_pho_sem", dtype=tf.float32),
    "sem_pho": tf.keras.metrics.Mean("train_loss_sem_pho", dtype=tf.float32),
    "triangle": tf.keras.metrics.Mean("train_loss_triangle", dtype=tf.float32),
}

# Train metrics
train_acc = {
    "ort_pho": metrics.PhoAccuracy("acc_ort_pho"),
    "pho_pho": metrics.PhoAccuracy("acc_pho_pho"),
    "sem_sem": metrics.RightSideAccuracy("acc_sem_sem"),
    "pho_sem": metrics.RightSideAccuracy("acc_pho_sem"),
    "sem_pho": metrics.PhoAccuracy("acc_sem_pho"),
    "triangle_pho": metrics.PhoAccuracy("acc_triangle_pho"),
    "triangle_sem": metrics.RightSideAccuracy("acc_triangle_sem"),
}

## Train step for each task

In [None]:
# Since each sub-task has its own states, it must be trained with separate optimizer,
# instead of sharing the same optimizer instance (https://github.com/tensorflow/tensorflow/issues/27120)

def get_train_step_one_output():
    """Wrap universal train step creator"""

    @tf.function
    def train_step(x, y, model, task, loss_fn, optimizer, train_metric, train_losses, sample_weights=None):

        train_weights_name = [x + ":0" for x in modeling.WEIGHTS_AND_BIASES[task]]
        train_weights = [x for x in model.weights if x.name in train_weights_name]

        with tf.GradientTape() as tape:
            y_pred = model(x, training=True)
            loss_value = loss_fn(y, y_pred)

            if sample_weights is not None:
                loss_value = sample

        grads = tape.gradient(loss_value, train_weights)
        optimizer.apply_gradients(zip(grads, train_weights))

        # Mean loss for Tensorboard
        train_losses.update_state(loss_value)

        # Metric for last time step (output first dimension is time ticks, from -cfg.output_ticks to end) for live results
        train_metric.update_state(tf.cast(y[-1], tf.float32), y_pred[-1])

    return train_step


def get_train_step_triangle():
    """Special train step for triangle phase with 2 outputs"""

    @tf.function
    def train_step_triangle(
        x,
        y,
        model,
        task,
        loss_fn,
        optimizer,
        train_metric_pho,
        train_metric_sem,
        train_losses,
    ):

        train_weights_name = [x + ":0" for x in modeling.WEIGHTS_AND_BIASES[task]]
        train_weights = [x for x in model.weights if x.name in train_weights_name]

        with tf.GradientTape() as tape:
            pho_pred, sem_pred = model(x, training=True)
            loss_value_pho = loss_fn(y[0], pho_pred)
            loss_value_sem = loss_fn(y[1], sem_pred)
            loss_value = loss_value_pho + loss_value_sem

        grads = tape.gradient(loss_value, train_weights)
        optimizer.apply_gradients(zip(grads, train_weights))

        # Mean loss for Tensorboard
        train_losses.update_state(loss_value)

        # Metric for last time step (output first dimension is time ticks, from -cfg.output_ticks to end) for live results
        train_metric_pho.update_state(tf.cast(y[0][-1], tf.float32), pho_pred[-1])
        train_metric_sem.update_state(tf.cast(y[1][-1], tf.float32), sem_pred[-1])


train_steps = {
    "ort_pho": get_train_step_one_output(),
    "pho_pho": get_train_step_one_output(),
    "pho_sem": get_train_step_one_output(),
    "sem_sem": get_train_step_one_output(),
    "sem_pho": get_train_step_one_output(),
    "triangle": get_train_step_triangle(),
}

# Train model

In [None]:
# TensorBoard writer
train_summary_writer = tf.summary.create_file_writer(cfg.path["tensorboard_folder"])

for epoch in range(cfg.total_number_of_epoch):
    start_time = time.time()

    for step in range(cfg.steps_per_epoch):
        # Intermix tasks (Draw a new task in each step)
        task = np.random.choice(cfg.tasks, p=cfg.tasks_probability)
        x_batch_train, y_batch_train = next(generators[task])
        model.set_active_task(task)  # task switching must be done outside trainstep...

        train_steps[task](
            x_batch_train,
            y_batch_train,
            model,
            task,
            loss_fns[task],
            optimizers[task],
            train_acc[task],
            train_losses[task],
        )

    # End of epoch operations

    ## Write log to tensorboard
    with train_summary_writer.as_default():
        ### Losses
        [
            tf.summary.scalar(f"loss_{x}", train_losses[x].result(), step=epoch)
            for x in train_losses.keys()
        ]

        ### Metrics
        [
            tf.summary.scalar(f"acc_{x}", train_acc[x].result(), step=epoch)
            for x in train_acc.keys()
        ]

        ### Weight histogram
        [tf.summary.histogram(f"{x.name}", x, step=epoch) for x in model.weights]

    ## Print status
    compute_time = time.time() - start_time
    print(f"Epoch {epoch + 1} trained for {compute_time:.0f}s")
    print(
        "Losses:",
        [f"{x}: {train_losses[x].result().numpy()}" for x in cfg.tasks],
    )
    clear_output(wait=True)

    ## Save weights
    if (epoch < 10) or ((epoch + 1) % cfg.save_freq == 0):
        weight_path = cfg.path["weights_checkpoint_fstring"].format(epoch=epoch + 1)
        model.save_weights(weight_path, overwrite=True, save_format="tf")

    ## Reset metric and loss
    [train_losses[x].reset_states() for x in train_losses.keys()]
    [train_acc[x].reset_states() for x in train_acc.keys()]


# End of training ops
print("Done")

# Evaluate model

In [None]:
# test = evaluate.EvalOral(cfg, model, data)
# test.eval('strain')

In [None]:
# OP eval
ts_strain = data.load_testset(os.path.join(tf_root, 'dataset', 'testsets', 'strain.pkl.gz'))

test = evaluate.TestSet(
    name = 'strain', 
    cfg=cfg, 
    model=model,
    task='ort_pho', 
    testitems=ts_strain['item'], 
    x_test=ts_strain['ort'],
    y_test=ts_strain['pho'])

test.eval_all()

# post-processing
merge_data = data.df_strain[['word', 'frequency', 'pho_consistency', 'imageability']].copy()
merge_data['cond'] = merge_data.frequency + '_' +  merge_data.pho_consistency
item_df = test.result.merge(merge_data, how='left', left_on='item', right_on='word')
item_df.to_csv(os.path.join(cfg.path['model_folder'], 'eval', 'strain_item_df.csv'))
mean_df = item_df.groupby(['epoch', 'timetick', 'cond', 'frequency', 'pho_consistency']).mean() .reset_index()
mean_df.to_csv(os.path.join(cfg.path['model_folder'], 'eval', 'strain_mean_df.csv'))
