In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_text as text
import tensorflow_hub as hub
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import GlobalMaxPooling1D, Dense
from tensorflow.keras import backend as K

In [2]:
max_seq_len = 128
preprocessor_dir = "/linguistics/ethan/DL_Prototype/models/universal-sentence-encoder-cmlm_multilingual-preprocess_2"
LaBSE_dir = "/linguistics/ethan/DL_Prototype/models/LaBSE2"

def build_model_with_preprocessor(max_seq_len, preprocessor_dir, LaBSE_dir):
    
    src_texts = tf.keras.layers.Input(shape=(), dtype=tf.string, name="input_src_text")
    tgt_texts = tf.keras.layers.Input(shape=(), dtype=tf.string, name="input_tgt_text")

    preprocessor = hub.KerasLayer(preprocessor_dir, trainable=False)
    encoder = hub.KerasLayer(LaBSE_dir, trainable=False)
    
    src_x = preprocessor(src_texts)
    tgt_x = preprocessor(tgt_texts)
    
    src_x = encoder(src_x)["default"]
    tgt_x = encoder(tgt_x)["default"]
    
    src_x = tf.math.l2_normalize(src_x, axis=1, epsilon=1e-12, name=None)
    tgt_x = tf.math.l2_normalize(tgt_x, axis=1, epsilon=1e-12, name=None)
    
    # np.matmul(english_embeds, np.transpose(italian_embeds))
    x = tf.concat([src_x, tgt_x], axis=1)
    #  x = GlobalMaxPooling1D(x)
    
    x = Dense(512, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    output = Dense(1, activation='sigmoid')(x)
    
    model = Model([src_texts, tgt_texts], output)
    
    return model

In [5]:
model = build_model_with_preprocessor(max_seq_len, preprocessor_dir, LaBSE_dir)

In [8]:
num_ckpts = 2
ckpt_weights = []

for epoch in range(1, num_ckpts+1):
    ckpt_path = f"/linguistics/ethan/DL_Prototype/models/LaBSE2_ckpts/tqc-000{epoch}.ckpt"
    model.load_weights(ckpt_path)
    weights = model.get_weights()
    ckpt_weights.append(weights)

In [16]:
ckpt_weights[0][-2].shape

(64, 1)

In [1]:
# averaged_weights = []
# for weights_list_tuple in zip(*ckpt_weights):

#     averaged_layer = [np.array(weights_).mean(axis=0) for weights_ in zip(weights_list_tuple)]
#     averaged_weights.append(averaged_layer)

# averaged_weights = np.array(averaged_weights)
# model.set_weights(averaged_weights)

In [22]:
it = [[1,2,3], [4,5,6]]
for t in zip(*it):
    print(t)

(1, 4)
(2, 5)
(3, 6)


In [159]:
def toy_model():
    
    input_src = tf.keras.layers.Input(shape=(10, ), name="input_src_text")
    input_tgt = tf.keras.layers.Input(shape=(10, ), name="input_tgt_text")
    x = tf.keras.layers.concatenate([input_src, input_tgt])
    output = tf.keras.layers.Dense(2, activation="softmax")(x)
    
    model = tf.keras.Model([input_src, input_tgt], output)
    
    return model

In [104]:
my_model = toy_model()

In [6]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_src_text (InputLayer)     [(None, 10)]         0                                            
__________________________________________________________________________________________________
input_tgt_text (InputLayer)     [(None, 10)]         0                                            
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 20)           0           input_src_text[0][0]             
                                                                 input_tgt_text[0][0]             
__________________________________________________________________________________________________
dense (Dense)                   (None, 2)            42          concatenate[0][0]     

In [3]:
input_src_text = tf.random.uniform((30, 10))
input_tgt_text = tf.random.uniform((30, 10))
labels = np.random.randint(2, size=(30,2))
# train_data = tf.data.Dataset.from_tensor_slices(dict(
#                                                  x = {"input_src_text": input_src_text,
#                                                       "input_tgt_text": input_tgt_text},
#                                                  y = labels)).batch(5)
train_data = tf.data.Dataset.from_tensor_slices(({"input_src_text": input_src_text,
                                                  "input_tgt_text": input_tgt_text},
                                                  labels)).batch(7)

In [4]:
for data in train_data:
    print(data[1])
    print("\n")

tf.Tensor(
[[1 0]
 [0 1]
 [0 1]
 [1 0]
 [0 1]
 [0 0]
 [1 0]], shape=(7, 2), dtype=int64)


tf.Tensor(
[[0 1]
 [0 1]
 [0 0]
 [1 1]
 [1 0]
 [1 0]
 [0 0]], shape=(7, 2), dtype=int64)


tf.Tensor(
[[1 0]
 [0 1]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [0 0]], shape=(7, 2), dtype=int64)


tf.Tensor(
[[1 0]
 [1 1]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [0 1]], shape=(7, 2), dtype=int64)


tf.Tensor(
[[1 1]
 [1 0]], shape=(2, 2), dtype=int64)




In [5]:
# high-level training
# model.compile(optimizer="adam",
#               metrics=["accuracy"],
#               loss="binary_crossentropy")
# model.fit(train_data, epochs=2)

# low level training with checkpoint storing and restoring
def train_step(my_model, example, optimizer, loss_fn):
    with tf.GradientTape() as tape:
        output = my_model(example[0], training=True)
        loss = loss_fn(example[1], output)
        
    variables = my_model.trainable_variables
    gradient = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradient, variables))
    
    return loss


def train_and_checkpoint_per_step(model, steps=50):
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print("Restoring from {}".format(manager.latest_checkpoint))
    else:
        print("Initializing training from scratch")
    for _ in range(steps):
        example = next(iterator)
        # print(example)
        loss = train_step(model, example, optimizer, loss_fn)
        ckpt.step.assign_add(1)
        if int(ckpt.step) % 10 == 0:
            save_path = manager.save()
            print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))
            print("Loss {:1.2f}".format(loss.numpy()))

def train_and_checkpoint_per_epoch(model, iterator, epochs=10):
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print("Restoring from {}".format(manager.latest_checkpoint))
    else:
        print("Initializing training from scratch")
        
    for epoch in range(epochs):
        print("\nTraining epoch: {}".format(epoch + 1))
        for example in iterator:
#             print(example)
            loss_value = train_step(model, example, optimizer, loss_fn)
            
#         ckpt.step.assign_add(1)
        save_path = manager.save()
        print("\tSaved checkpoint for epoch {}: {}".format(epoch + 1, save_path))
        print("\tLoss at final step {:1.2f}".format(loss_value.numpy()))

In [6]:
"""Training for first time or restoring training, remember to re-initiate ckpt and manager."""
my_model = toy_model()
optimizer = tf.keras.optimizers.Adam(0.1)
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
train_iterator = iter(train_data)
ckpt = tf.train.Checkpoint(step=tf.Variable(1), 
                           optimizer=optimizer,
                           net=my_model, 
                           iterator=train_iterator)
manager = tf.train.CheckpointManager(ckpt, "/linguistics/ethan/DL_Prototype/models/example_ckpt", max_to_keep=3)

NameError: name 'toy_model' is not defined

In [171]:
# train_and_checkpoint_per_step(my_model, steps=50)
train_and_checkpoint_per_epoch(my_model, train_iterator, epochs=10)
# for e in iterator:
#     print(e)

Restoring from /linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-10

Training epoch: 1
	Saved checkpoint for epoch 1: /linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-11
	Loss at final step 1.00

Training epoch: 2
	Saved checkpoint for epoch 2: /linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-12
	Loss at final step 1.00

Training epoch: 3
	Saved checkpoint for epoch 3: /linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-13
	Loss at final step 1.00

Training epoch: 4
	Saved checkpoint for epoch 4: /linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-14
	Loss at final step 1.00

Training epoch: 5
	Saved checkpoint for epoch 5: /linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-15
	Loss at final step 1.00

Training epoch: 6
	Saved checkpoint for epoch 6: /linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-16
	Loss at final step 1.00

Training epoch: 7
	Saved checkpoint for epoch 7: /linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-17
	Loss

In [178]:
# my_model.load_weights(manager.latest_checkpoint)
# tf.keras.models.load_model(manager.latest_checkpoint)
ckpt.restore(manager.latest_checkpoint)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f935412acf8>

In [181]:
input_data = [tf.random.uniform((5, 10)), tf.random.uniform((5, 10))]

In [182]:
my_model = toy_model()
my_model(input_data)

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[0.5499562 , 0.45004386],
       [0.5966115 , 0.40338856],
       [0.6039533 , 0.3960467 ],
       [0.65478605, 0.34521392],
       [0.6120649 , 0.38793504]], dtype=float32)>

In [183]:
my_model = toy_model()
my_model(input_data)

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[0.46309888, 0.53690106],
       [0.45111725, 0.5488827 ],
       [0.533317  , 0.46668294],
       [0.23123655, 0.7687634 ],
       [0.55321455, 0.44678545]], dtype=float32)>

In [188]:
ckpt = tf.train.Checkpoint(model=my_model)
ckpt.restore("/linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-20")
my_model(input_data)

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[0.46309888, 0.53690106],
       [0.45111725, 0.5488827 ],
       [0.533317  , 0.46668294],
       [0.23123655, 0.7687634 ],
       [0.55321455, 0.44678545]], dtype=float32)>

In [191]:
ckpt(input_data)

TypeError: 'Checkpoint' object is not callable

In [176]:
# ckpt.step.numpy()
# ckpt.step.assign_add(1)
# ckpt.step.numpy()
manager.latest_checkpoint

'/linguistics/ethan/DL_Prototype/models/example_ckpt/ckpt-20'

In [16]:
model.predict([tf.random.uniform((2, 10)), tf.random.uniform((2, 10))])

array([[0.20321831, 0.79678166],
       [0.6461293 , 0.35387072]], dtype=float32)