In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os, random
import meta, modeling, old_modeling, data_wrangling, evaluate, troubleshooting
from importlib import reload

In [None]:
reload(old_modeling)
reload(modeling)

In [None]:
code_name = "Refrac2_5M"
cfg = meta.ModelConfig.from_json(os.path.join('/home/jupyter/tf/models', code_name, 'model_config.json'))

In [None]:
old_model = old_modeling.TriangleModel(cfg)
new_model = modeling.TriangleModel(cfg, batch_size_override=20)

In [None]:
old_model.load_weights(cfg.saved_weights[-1])
new_model.load_weights(cfg.saved_weights[-1])

In [None]:
old_model.build()
new_model.build()

In [None]:
def test_same_model(model, task1, task2):
    """Test model output differences in two given task in the same model equal
    Return the maximum difference 
    
    """
    i_name1, o_name1 = modeling.IN_OUT[task1]
    i_name2, o_name2 = modeling.IN_OUT[task2]

    assert i_name1 == i_name2
    assert o_name1 == o_name2

    i_name, o_name = i_name1, o_name1

    strain = data_wrangling.load_testset('dataset/testsets/strain_hf_con_hi.pkl.gz')
    input_x = [strain[i_name]] * 13

    model.set_active_task(task1)
    y_pred1 = model(input_x)
    
    model.set_active_task(task2)
    y_pred2 = model(input_x)



    print(f"Max difference in {o_name} output:")
    for i in range(13):
        print(
            tf.reduce_max(
                tf.math.abs(
                    y_pred1[o_name][i] - y_pred2[o_name][i]
                )
            )
        )

In [None]:
test_same_model(new_model, 'exp_os', 'ort_sem')
test_same_model(new_model, 'exp_op', 'ort_pho')

Minimal implementation of OP (ort_pho) and minimum change implementation (exp_op) are the same. Likewise in ort_sem and exp_os

# Double check weights

In [None]:
# Check loaded weight equal inside model instances
list(map(lambda x: tf.reduce_max(abs(x[0] - x[1])), zip(old_model.weights, new_model.weights)))

Weights are identical

In [None]:
def test_task(task_name, old_model, new_model, output_name_old=None, output_name_new=None):
    """Test model output differences in a given task
    Return the maximum difference 
    """
    i_name, o_name = modeling.IN_OUT[task_name]

    strain = data_wrangling.load_testset('dataset/testsets/strain_hf_con_hi.pkl.gz')
    input_x = [strain[i_name]] * 13

    old_model.set_active_task(task_name)
    new_model.set_active_task(task_name)

    y_pred_old = old_model(input_x)
    y_pred_new = new_model(input_x)

    print(f"Max difference in {output_name_old} output:")
    for i in range(13):
        print(
            tf.reduce_max(
                tf.math.abs(
                    y_pred_new[output_name_new][i] - y_pred_old[output_name_old][i]
                )
            )
        )
            


In [None]:
for task in ('triangle', 'pho_pho', 'sem_pho'):
    print(f'Max abs diff in {task}')
    test_task(task, old_model, new_model, 'pho', 'pho')

1e-5 max abs differece was found in PHO output

In [None]:
for task in ('triangle', 'sem_sem', 'pho_sem', 'ort_sem'):
    print(f'Max abs diff in {task}')
    test_task(task, old_model, new_model, 'sem', 'sem')

1e-4 max abs difference was found in sem output

# Dig deeper in OP task

In [None]:
test_task('ort_pho', old_model, new_model, output_name_old='hop', output_name_new='hop')

OP is fine

In [None]:
test_task('ort_pho', old_model, new_model, output_name_old='input_p', output_name_new='input_pho')

Input P is not fine

In [None]:
test_task('ort_pho', old_model, new_model, output_name_old='input_cpp', output_name_new='input_cpp')

CPP also not fine

Hop is fine

One of the problematic chunk:

Old
``` {python}
##### Phonology Cleanup layer #####
cpp = self.tau * (tf.matmul(act_p_list[t], w_pc) + bias_cpp)
cpp += (1 - self.tau) * input_cpp_list[t]
```

New
```
##### Phonology Cleanup layer #####
self.input_cpp = self.input_cpp.write(
    t + 1,
    self.tau * (tf.matmul(self.pho.read(t), w_pc) + bias_cpp)
    + (1 - self.tau) * self.input_cpp.read(t),
)
```

## Examine piece by piece

In [None]:
strain = data_wrangling.load_testset('dataset/testsets/strain_hf_con_hi.pkl.gz')
input_x = [strain['ort']] * 13

In [None]:
new_model.set_active_task('ort_pho')
old_model.set_active_task('ort_pho')
y_pred_new = new_model(input_x)
y_pred_old = old_model(input_x)

In [None]:
y_pred_old.keys()

In [None]:
y_pred_new.keys()

In [None]:
def check_eq(a, b, verbose=False):
    print(tf.reduce_max(abs(a-b)).numpy() == 0.)
    if verbose:
        print(a)
        print(b)


# Check time averaged input components

In [None]:
# Check tau
new_model.tau == old_model.tau
check_eq(old_model.tau, new_model.tau)

In [None]:
# act_p_0
act_pho_0_old = y_pred_old['pho'][0]
act_pho_0_new = y_pred_new['pho'][0]
check_eq(act_pho_0_new, act_pho_0_old, verbose=True)

# Forced shape batch_size x pho units in new  

In [None]:
# Check W_PC
w_pc_new = [x for x in new_model.weights if x.name.endswith('w_pc:0')][0]
w_pc_old = [x for x in old_model.weights if x.name.endswith('w_pc:0')][0]
check_eq(w_pc_new, w_pc_old)

In [None]:
# Check bias equal
bias_cpp_new = [x for x in new_model.weights if x.name.endswith('bias_cpp:0')][0]
bias_cpp_old = [x for x in old_model.weights if x.name.endswith('bias_cpp:0')][0]
check_eq(bias_cpp_new, bias_cpp_old)

In [None]:
p1_new = new_model.tau * (tf.matmul(act_pho_0_new, w_pc_new) + bias_cpp_new)
p1_old = old_model.tau * (tf.matmul(act_pho_0_old, w_pc_old) + bias_cpp_old)
check_eq(p1_new, p1_old, verbose=True)

- new dim: (20, 50)
- old dim: (1, 50)

all underlying components are the same, yet, tau * (matmul(act_p * w_pc) + bias_cpp) is not identical

## Consider a reduced example

In [None]:
a1 = [[1., 2., 3.]] # Old implementation of activation 
a2 = [[1., 2., 3.], [1., 2., 3.], [1., 2., 3.]] # New implementation with batch_size = 3
w = [[2., 3. ,2.], [1., 0., 0.], [0., 0., 1.]] # simplified w_pc


In [None]:
r1 = tf.matmul(a1, w)
r2 = tf.matmul(a2, w)
check_eq(r1, r2, verbose=True)

- The actual calculation should behave like the toy example

In [None]:
check_eq(p1_old[0], p1_new[0], verbose=True)

In [None]:
new = tf.matmul(act_pho_0_new, w_pc_new)[0]

In [None]:
old = tf.matmul(act_pho_0_old, w_pc_old)[0]

In [None]:
act_np = act_pho_0_new[0].numpy()
w_np = w_pc_new.numpy()
np_from_new = np.matmul(act_np, w_np)

In [None]:
act_np = act_pho_0_old[0].numpy()
w_np = w_pc_old.numpy()
np_from_old = np.matmul(act_np, w_np)

In [None]:
def mae(a,b):
    return np.max(np.abs(a-b))

In [None]:
mae(np_from_new, np_from_old)

In [None]:
mae(np_from_new, new)

In [None]:
mae(np_from_old, old)

TensorArray implementation is not very precise...