# HS04 model

## Phase 2

> The weights that were obtained at the end of the Phase 1 model were frozen and embedded in the larger reading model. Thus, only the connections from orthography to other units were trained in Phase 2. Freezing the weights is not strictly necessary; earlier work (Harm & Seidenberg, 1997) used a process of intermixing in which comprehension trials were used along with reading trials. Weight freezing has the same effect but is simpler and less computationally burdensome to implement. Intermixing is effective and real- istic but adds substantially to network training time.

- *Pretraining is necessary, and freeze in phase 2

> One set of 500 hidden units mediated the mapping from these orthographic units to semantics...

- *500 sem_hidden_units*

> ...a second set of 100 hidden units mediated the orth-phon pathway.

- *100 pho_hidden_units*

> To computationally instantiate the principle that the reading system is under pressure to perform rapidly as well as accurately, we injected error into the semantic and phonological representa- tions early, from time samples 2 to 12. 
- *11 output_ticks*



In [11]:
%reload_ext lab_black
import pickle, os, time
import tensorflow as tf
import numpy as np
import pandas as pd
import altair as alt
from IPython.display import clear_output

import meta

import data_wrangling, modeling, metrics, evaluate

# Parameters block (for papermill)

In [12]:
code_name = "tri5"
tf_root = "/home/jupyter/tf"

# Model architechture
ort_units = 119
pho_units = 250
sem_units = 2446

hidden_os_units = 500  # P2
hidden_op_units = 100  # P2
hidden_ps_units = 500
hidden_sp_units = 500

pho_cleanup_units = 20
sem_cleanup_units = 50

pho_noise_level = 0.0  # P3
sem_noise_level = 0.0  # P3

activation = "sigmoid"
tau = 1 / 3
max_unit_time = 4.0
output_ticks = 11

# Pretraining
pretrained_checkpoint = "/home/jupyter/tf/models/tri1_pretrain/weights/ep0200"

# Training
sample_name = "jay"

rng_seed = 2021
learning_rate = 0.01
n_mil_sample = 1.5
zero_error_radius = 0.1
batch_size = 100
save_freq = 10

In [13]:
# cfg = meta.ModelConfig.from_json(os.path.join("models", code_name, "model_config.json"))

Loading config from models/tri5/model_config.json


In [4]:
config_dict = {}

# Load global cfg variables into a dictionary for feeding into ModelConfig()
for v in meta.CORE_CONFIGS:
    try:
        config_dict[v] = globals()[v]
    except:
        raise

for v in meta.OPTIONAL_CONFIGS:
    try:
        config_dict[v] = globals()[v]
    except:
        pass

# Construct ModelConfig object
cfg = meta.ModelConfig(**config_dict)
cfg.save()
del config_dict

init from scratch
Saved config json to /home/jupyter/tf/models/tri5/model_config.json


# Build model and all supporting components

In [5]:
tf.random.set_seed(cfg.rng_seed)
data = data_wrangling.MyData()
model = modeling.HS04Model(cfg)

sampler = data_wrangling.FastSampling(cfg, data)
generators = {"triangle": sampler.sample_generator(x="ort", y=["pho", "sem"])}
optimizers = {"triangle": tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate)}
loss_fns = {"triangle": modeling.CustomBCE(radius=cfg.zero_error_radius)}

# Mean loss (for TensorBoard)
train_losses = {
    "triangle": tf.keras.metrics.Mean("train_loss_triangle", dtype=tf.float32)
}

# Train metrics
train_acc = {
    "triangle_pho": metrics.PhoAccuracy("acc_triangle_pho"),
    "triangle_sem": metrics.RightSideAccuracy("acc_triangle_sem"),
}

# Train step for triangle model 

In [6]:
@tf.function
def train_step_triangle(
    x,
    y,
    model,
    task,
    loss_fn,
    optimizer,
    train_metric_pho,
    train_metric_sem,
    train_losses,
):

    train_weights_name = [x + ":0" for x in modeling.WEIGHTS_AND_BIASES[task]]
    train_weights = [x for x in model.weights if x.name in train_weights_name]

    with tf.GradientTape() as tape:
        pho_pred, sem_pred = model(x, training=True)
        loss_value_pho = loss_fn(y[0], pho_pred)
        loss_value_sem = loss_fn(y[1], sem_pred)
        loss_value = loss_value_pho + loss_value_sem

    grads = tape.gradient(loss_value, train_weights)
    optimizer.apply_gradients(zip(grads, train_weights))

    # Mean loss for Tensorboard
    train_losses.update_state(loss_value)

    # Metric for last time step (output first dimension is time ticks, from -cfg.output_ticks to end)
    train_metric_pho.update_state(tf.cast(y[0][-1], tf.float32), pho_pred[-1])
    train_metric_sem.update_state(tf.cast(y[1][-1], tf.float32), sem_pred[-1])


train_steps = {"triangle": train_step_triangle}

# Train model

In [7]:
model.build()
model.load_weights(pretrained_checkpoint)
task = "triangle"
model.set_active_task(task)


# TensorBoard writer
train_summary_writer = tf.summary.create_file_writer(cfg.path["tensorboard_folder"])

for epoch in range(cfg.total_number_of_epoch):
    start_time = time.time()

    for step in range(cfg.steps_per_epoch):

        x_batch_train, y_batch_train = next(generators[task])

        train_steps[task](
            x_batch_train,
            y_batch_train,
            model,
            task,
            loss_fns[task],
            optimizers[task],
            train_acc["triangle_pho"],
            train_acc["triangle_sem"],
            train_losses[task],
        )

    # End of epoch operations

    ## Log all scalar metrics (losses and metrics)and histogram (weights and biases) to tensorboard
    with train_summary_writer.as_default():

        [
            tf.summary.scalar(f"loss_{x}", train_losses[x].result(), step=epoch)
            for x in train_losses.keys()
        ]
        [
            tf.summary.scalar(f"acc_{x}", train_acc[x].result(), step=epoch)
            for x in train_acc.keys()
        ]
        [tf.summary.histogram(f"{x.name}", x, step=epoch) for x in model.weights]

    ## Print status
    compute_time = time.time() - start_time
    print(f"Epoch {epoch + 1} trained for {compute_time:.0f}s")
    print(f"Losses: {train_losses[task].result().numpy()}")
    clear_output(wait=True)

    ## Save weights
    if (epoch < 10) or ((epoch + 1) % cfg.save_freq == 0):
        weight_path = cfg.path["weights_checkpoint_fstring"].format(epoch=epoch + 1)
        model.save_weights(weight_path, overwrite=True, save_format="tf")

    ## Reset metric and loss
    [train_losses[x].reset_states() for x in train_losses.keys()]
    [train_acc[x].reset_states() for x in train_acc.keys()]

# End of training ops
# model.save(cfg.path["save_model_folder"])
print("Done")

Done


# Evaluate model

In [34]:
# from importlib import reload

# reload(data_wrangling)
data = data_wrangling.MyData()
model = modeling.HS04Model(cfg)
model.build()
model.set_active_task("triangle")

In [35]:
test = evaluate.EvalReading(cfg, model, data)
# test.eval('train')
test.eval("cortese")
test.eval("strain")
test.eval("grain")
test.eval("taraban")

Evaluation results found, loaded from file.
Evaluation results found, loaded from file.
Evaluation results found, loaded from file.
Evaluation results found, loaded from file.


In [None]:
# Temp fix for pd float64 new data type error, read from disk as a work around
test = evaluate.EvalReading(cfg, model, data)
test.eval("cortese")
test.eval("strain")
test.eval("grain")
test.eval("taraban")

## Basic accuracy over epoch

In [None]:
# Train ACC by OUTPUT
# test.plot_reading_acc(test.train_mean_df).encode(y="mean(acc):Q").save(
#     os.path.join(cfg.path["plot_folder"], "train_acc.html")
# )

In [16]:
# Strain ACC by OUTPUT
test.plot_reading_acc(test.strain_mean_df).encode(y="mean(acc)").save(
    os.path.join(cfg.path["plot_folder"], "strain_acc.html")
)

In [17]:
# Grain PHO ACC by COND
df = test.grain_mean_df.loc[test.grain_mean_df.y_test.isin(["pho"])]
test.plot_reading_acc(df).encode(color="testset").save(
    os.path.join(cfg.path["plot_folder"], "grain_acc.html")
)

In [18]:
# Grain ACC by RESP x COND
df = test.grain_mean_df.loc[
    test.grain_mean_df.y_test.isin(["pho_large_grain", "pho_small_grain"])
]
test.plot_reading_acc(df).encode(color="testset", strokeDash="y_test").save(
    os.path.join(cfg.path["plot_folder"], "grain_acc_by_resp.html")
)

## Freq x Consistency

In [32]:
epoch_selection = alt.selection_single(
    bind=alt.binding_range(min=10, max=150, step=10),
    fields=["epoch"],
    init={"epoch": 150},
    name="epoch",
)

timetick_selection = alt.selection_single(
    bind=alt.binding_range(min=0, max=cfg.n_timesteps, step=1),
    fields=["timetick"],
    init={"timetick": cfg.n_timesteps},
    name="timetick",
)

In [33]:
# Taraban
taraban_selected_conditions = [
    "taraban_hf-exc",
    "taraban_hf-reg-inc",
    "taraban_lf-exc",
    "taraban_lf-reg-inc",
]

df = test.taraban_mean_df.copy()
df = df.loc[
    (df.testset.isin(taraban_selected_conditions))
    & (df.timetick >= 4)
    & (df.y == "pho")
]

df["frequency"] = df.testset.str.slice(8, 10)
df["regularity"] = df.testset.str.slice(11, 14)


(
    alt.Chart(df)
    .mark_line()
    .encode(
        x=alt.X("frequency:N", sort="descending"),
        y="mean(conditional_sse):Q",
        color="regularity:N",
    )
    .add_selection(epoch_selection)
    .add_selection(timetick_selection)
    .transform_filter(epoch_selection)
    .transform_filter(timetick_selection)
    .properties(width=180, height=180)
)

# .save(os.path.join(cfg.path["plot_folder"], "replication_hs04_fig10_taraban.html"))

In [28]:
df.columns

Index(['Unnamed: 0', 'code_name', 'task', 'testset', 'epoch', 'timetick', 'y',
       'acc', 'conditional_sse', 'sse', 'frequency', 'regularity'],
      dtype='object')

In [20]:
# Strain
df = test.strain_mean_df.loc[
    (test.strain_mean_df.timetick >= 4) & (test.strain_mean_df.y == "pho")
]

alt.Chart(df).mark_line().encode(
    x=alt.X("frequency:N", sort="descending"),
    y="sum(sse):Q",
    color="pho_consistency:N",
).add_selection(epoch_selection).transform_filter(epoch_selection).properties(
    width=180, height=180
).save(
    os.path.join(cfg.path["plot_folder"], "replication_hs04_fig10_strain.html")
)

## Nonword

In [21]:
import evaluate_old

glushko = evaluate_old.glushko_eval(cfg, data, model)
glushko.start_evaluate()

mdf = glushko.i_hist.groupby(["epoch", "timestep", "cond"]).mean().reset_index()



# ACC
alt.Chart(mdf).mark_line().encode(x="epoch", y="acc", color="cond").add_selection(
    timetick_selection
).transform_filter(timetick_selection).save(
    os.path.join(cfg.path["plot_folder"], "glushko_acc.html")
)

# SSE
alt.Chart(mdf).mark_line().encode(x="epoch", y="sse", color="cond").add_selection(
    timetick_selection
).transform_filter(timetick_selection).save(
    os.path.join(cfg.path["plot_folder"], "glushko_sse.html")
)

All done 



## Imageability

In [22]:
# Strain imageability
df = test.strain_mean_df.copy()
df["fc"] = df.frequency + "-" + df.pho_consistency
df = df.loc[
    df.timetick >= 4,
]

y_selection = alt.selection_single(
    bind=alt.binding_radio(options=["pho", "sem"]), fields=["y"], init={"y": "pho"}
)

epoch_selection = alt.selection_single(
    bind=alt.binding_range(min=10, max=150, step=10),
    fields=["epoch"],
    init={"epoch": 150},
    name="epoch",
)

# timetick_selection = alt.selection_single(
#     bind=alt.binding_range(min=2, max=12, step=1),
#     fields=["timetick"],
#     init={"timetick": 12},
#     name="timetick",
# )

fig11 = (
    alt.Chart(df)
    .mark_bar()
    .encode(
        column=alt.X("fc:N", sort=["HF-CON", "LF-CON", "HF-INC", "LF-INC"]),
        y="mean(conditional_sse):Q",
        x="imageability:N",
        color="imageability:N",
    )
    .add_selection(epoch_selection)
    .add_selection(y_selection)
    .transform_filter(epoch_selection)
    .transform_filter(y_selection)
)

fig11.save(os.path.join(cfg.path["plot_folder"], "replication_hs04_fig11_csse.html"))

fig11.encode(y="mean(sse):Q").save(
    os.path.join(cfg.path["plot_folder"], "replication_hs04_fig11_sse.html")
)

fig11.encode(y="mean(acc):Q").save(
    os.path.join(cfg.path["plot_folder"], "replication_hs04_fig11_acc.html")
)

In [23]:
# Imageability only within Strain
timetick_selection = alt.selection_single(
    bind=alt.binding_range(min=0, max=cfg.n_timesteps, step=1),
    fields=["timetick"],
    init={"timetick": cfg.n_timesteps},
    name="timetick",
)

alt.Chart(test.strain_mean_df).mark_line().encode(
    x="epoch", y="mean(sse)", color="imageability", column="y"
).add_selection(timetick_selection).transform_filter(timetick_selection).save(
    os.path.join(cfg.path["plot_folder"], "Strain_sse_img_by_output.html")
)

In [24]:
test.plot_reading_acc(test.cortese_mean_df).encode(
    y="mean(conditional_sse)", color="testset", column="y"
).save(os.path.join(cfg.path["plot_folder"], "cortese_csse.html"))

test.plot_reading_acc(test.cortese_mean_df).encode(
    y="mean(sse)", color="testset", column="y"
).save(os.path.join(cfg.path["plot_folder"], "cortese_sse.html"))

test.plot_reading_acc(test.cortese_mean_df).encode(
    y="mean(acc)", color="testset", column="y"
).save(os.path.join(cfg.path["plot_folder"], "cortese_acc.html"))

In [25]:
# gcloud compute ssh tensorflow-2-4-20210120-000018 --zone us-east4-b -- -L 6006:localhost:6006
# !tensorboard --logdir tensorboard_log

# !tensorboard dev upload --logdir tensorboard_log