In [1]:
# disable compiler warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# imports 
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.python.keras.layers import Dense
from typing import List
import math

%load_ext tensorboard

In [2]:
# get mnist from tensorflow_datasets
mnist = tfds.load("mnist", split =["train","test"], as_supervised=True)
train_ds = mnist[0]
val_ds = mnist[1]

# write function to create the dataset that we want
def preprocess(data, batch_size, subtask):
    # FIRST STEP
    # image should be float
    data = data.map(lambda x, t: (tf.cast(x, float), t))
    # image should be flattened
    data = data.map(lambda x, t: (tf.reshape(x, (-1,)), t))
    # image vector will here have values between -1 and 1
    data = data.map(lambda x,t: ((x/128.)-1., t))
    # we want to have two mnist images in each example
    # this leads to a single example being ((x1,y1),(x2,y2))
    zipped_ds = tf.data.Dataset.zip((data.shuffle(2000), 
                                     data.shuffle(2000)))


    # SECOND STEP
    # a + b >= 5 is a boolean classification -> one output perceptron, BinaryCrossEntropy loss function
    # y_true: 0 oder 1, y_pred : [0,1]
    # 
    # a - b = y choose a number for {-9, -8, ..., 8, 9} softmax from 19 possibilities, CategoricalCrossEntropy loss function
    # y_true: one-hot-vector size 19, y_pred : one-hot-vector size 19 softmax

    if (subtask == 1):
        zipped_ds = zipped_ds.map(lambda x1, x2: (x1[0], x2[0], x1[1] + x2[1] >= 5))
        # transform boolean target to int
        zipped_ds = zipped_ds.map(lambda x1, x2, t: (x1,x2, tf.cast(t, tf.int32)))
    elif (subtask == 2):
        # target is -9, -8, ..., 8, 9
        # enumerate 0 means -9, 1 means -8, .... , 18 means 9
        zipped_ds= zipped_ds.map(lambda x1, x2: (x1[0], x2[0], tf.one_hot(x1[1] - x2[1] + 9, depth = 19)))


    # batch the dataset
    zipped_ds = zipped_ds.batch(batch_size)
    # prefetch
    zipped_ds = zipped_ds.prefetch(tf.data.AUTOTUNE)
    return zipped_ds

train_ds_sub1 = preprocess(train_ds, batch_size=32, subtask=1) #train_ds.apply(preprocess)
val_ds_sub1 = preprocess(val_ds, batch_size=32, subtask=1) #val_ds.apply(preprocess)

train_ds_sub2 = preprocess(train_ds, batch_size=32, subtask=2) #train_ds.apply(preprocess)
val_ds_sub2 = preprocess(val_ds, batch_size=32, subtask=2) #val_ds.apply(preprocess)



In [3]:
# check the contents of the dataset
for img1, img2, label in train_ds_sub1.take(1):
    print(img1.shape, img2.shape, label.shape)
    #print(img1, img2, label)

for img1, img2, label in train_ds_sub2.take(1):
    print(img1.shape, img2.shape, label.shape)
    #print(img1, img2, label)

(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32, 19)


In [4]:
class TwinMNISTModel(tf.keras.Model):

    # 1. constructor
    def __init__(self, optimizer):
        super().__init__()
        # inherit functionality from parent class

        # optimizer, loss function and metrics
        self.metrics_list = [tf.keras.metrics.BinaryAccuracy(),
                            tf.keras.metrics.Mean(name="loss")]
        
        self.optimizer = optimizer
        
        self.loss_function_sub1 = tf.keras.losses.BinaryCrossentropy()
        self.loss_function_sub2 = tf.keras.losses.CategoricalCrossentropy()
        
        # layers to be used

        # feed 28*28 pixels into this layer, spits out 0-9 one-hot-vector
        self.dense1 = tf.keras.layers.Dense(32, activation=tf.nn.relu)

        # feed in 0-9 one-hot-vector, spits out softmax representation
        self.dense2 = tf.keras.layers.Dense(10, activation=tf.nn.softmax)

        # gets in concatenated one-hot-vector with softmax activation, spits out prediction of subtask1 -> one perceptron with 0or1
        self.out_layer_sub1 = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
        # gets in concatenated one-hot-vector with softmax activation, spits out prediction of subtask2 -> one-hot-vector of size 19
        self.out_layer_sub2 = tf.keras.layers.Dense(19 ,activation=tf.nn.softmax)
        
        
        
        
    # 2. call method (forward computation)
    @tf.function
    def call(self, images, subtask, training=False):
        # get images
        img1, img2 = images

        # let image go through first two layers, spits out one-hot-vector representing the number seen
        img1_x = self.dense1(img1)
        img1_x = self.dense2(img1_x)
        
        # same with second image
        img2_x = self.dense1(img2)
        img2_x = self.dense2(img2_x)

        # concat the one-hot-vectors
        combined_x = tf.concat([img1_x, img2_x ], axis=1)

        # feed concatenation into chosen subtask
        if subtask == 1:   
            ret = tf.squeeze(self.out_layer_sub1(combined_x), axis=None, name=None)
        else:
            ret = tf.squeeze(self.out_layer_sub2(combined_x), axis=None, name=None)

        return ret



    # 3. metrics property
    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model



    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()



    # 5. train step method
    def train_step(self, data, subtask):
        img1, img2, label = data
        
        with tf.GradientTape() as tape:
            output = self((img1, img2), subtask, training=True)
            if subtask == 1:
                loss = self.loss_function_sub1(label, output)
            else:
                loss = self.loss_function_sub2(label, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        self.metrics_list[0].update(loss)

        if subtask == 1:
            self.metrics_list[1].update(label, output)
        
        return {m.name() : m.result() for m in self.metrics_list}


    # 6. test_step method
    def test_step(self, data, subtask):
        img1, img2, label = data

        prediction = self((img1, img2), subtask, training=False)

        if subtask == 1:
            loss = self.loss_function_sub1(label, prediction)
        else:
            loss = self.loss_function_sub2(label, prediction)

        self.metrics_list[0].update(loss)
        if subtask == 1:
            self.metrics_list[1].update(label, prediction)
            
        return {m.name() : m.result() for m in self.metrics_list}

## Create a summary writer to log data

- use tf.summary.create_file_writer(log_path)

In [5]:
#current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/"

val_log_path = f"logs/"

train_summary_writer = ...

val_summary_writer = ...

## Write a training loop function

Arguments: 
 - the model to train, 
 - the data to train on, 
 - the data to test on, 
 - how many epochs to train, 
 - the train summary writer object to use for logging
 - the validation summary writer object to use for logging
 - a path to save trained model weights to

In [10]:
def training_loop(model, train_ds, subtask, epochs):
    train_loss = 0
    # 1. iterate over epochs
    for epoch in range(epochs):
        # 2. train steps on all batches in the training data
        for (img1, img2, label) in train_ds:
            loss = model.train_step((img1, img2, label), subtask=subtask)
            train_loss += model.train_step((img1, img2, label), subtask=subtask)

    return train_loss

        # 3. log and print training metrics
        #with train_summary_writer.as_default():
        #    ...
        # 4. reset metric objects
        # 5. evaluate on validation data
        # 6. log validation metrics
        #with val_summary_writer.as_default():
        #    ...
        # 7. reset metric objects
    # 8. save model weights

def test_loop(model, test_ds, subtask):
    test_loss = 0
    test_accuracy = 0

    ratio_agg = []

    divisor = 0

    for (img1, img2, label) in test_ds:
        loss, prediction = model.test_step((img1, img2, label), subtask=subtask)
        test_loss += loss

        if subtask == 1:
            acc_matrix = np.absolute(prediction.numpy() - label.numpy()) < 0.5
            ratio = acc_matrix.sum() / acc_matrix.size
            ratio_agg.append(ratio)
        if subtask == 2:
            sample_test_accuracy = np.argmax(label, axis=1) == np.argmax(prediction, axis=1)   
            print("label: ", np.argmax(label, axis=1))
            print("prediction: ", np.argmax(prediction, axis=1))
            sample_test_accuracy = np.mean(sample_test_accuracy)
            ratio_agg.append(sample_test_accuracy)
            print(sample_test_accuracy)
            #sample_test_accuracy
    
    test_accuracy = np.average(np.array(ratio_agg))

    return test_loss, test_accuracy

## Use the training loop function to train the model

In [12]:
# open the tensorboard logs
#%tensorboard --logdir logs/
train_ds_sub1 = preprocess(train_ds, batch_size=32, subtask=1) #train_ds.apply(preprocess)
val_ds_sub1 = preprocess(val_ds, batch_size=32, subtask=1) #val_ds.apply(preprocess)

train_ds_sub2 = preprocess(train_ds, batch_size=32, subtask=2) #train_ds.apply(preprocess)
val_ds_sub2 = preprocess(val_ds, batch_size=32, subtask=2) #val_ds.apply(preprocess)

model = TwinMNISTModel(tf.keras.optimizers.Adam())


print(f"SUBTASK 1: \n")

loss, accuracy = test_loop(model, test_ds=val_ds_sub1, subtask=1)
print(f"Loss before training: {loss}, Accuracy before training: {accuracy}\n")

print(f"training .....\n")
loss = training_loop(model, train_ds=train_ds_sub1, subtask=1, epochs=3)
print(f".....training done with loss: {loss}!\n\n")

loss, accuracy = test_loop(model, test_ds=val_ds_sub1, subtask=1)
print(f"Loss after training: {loss}, Accuracy after training: {accuracy}\n")


#model = TwinMNISTModel(tf.keras.optimizers.Adam())
print(f"SUBTASK 2: \n")

loss, accuracy = test_loop(model, test_ds=val_ds_sub2, subtask=2)
print(f"Loss before training: {loss}, Accuracy before training: {accuracy}\n")

print(f"training .....\n")
loss = training_loop(model, train_ds=train_ds_sub2, subtask=2, epochs=7)
print(f".....training done with loss: {loss}!\n\n")

loss, accuracy = test_loop(model, test_ds=val_ds_sub2, subtask=2)
print(f"Loss after training: {loss}, Accuracy after training: {accuracy}\n")



### NEW OPTIMIZER SGD WITH 0.01!!!! ### 

model = TwinMNISTModel(tf.keras.optimizers.SGD(0.01))

loss, accuracy = test_loop(model, test_ds=val_ds_sub1, subtask=1)
print(f"Loss before training: {loss}, Accuracy before training: {accuracy}\n")

print(f"training .....\n")
loss = training_loop(model, train_ds=train_ds_sub1, subtask=1, epochs=3)
print(f".....training done with loss: {loss}!\n\n")

loss, accuracy = test_loop(model, test_ds=val_ds_sub1, subtask=1)
print(f"Loss after training: {loss}, Accuracy after training: {accuracy}\n")



#model = TwinMNISTModel(tf.keras.optimizers.Adam())
print(f"SUBTASK 2: \n")

loss, accuracy = test_loop(model, test_ds=val_ds_sub2, subtask=2)
print(f"Loss before training: {loss}, Accuracy before training: {accuracy}\n")

print(f"training .....\n")
loss = training_loop(model, train_ds=train_ds_sub2, subtask=2, epochs=7)
print(f".....training done with loss: {loss}!\n\n")

loss, accuracy = test_loop(model, test_ds=val_ds_sub2, subtask=2)
print(f"Loss after training: {loss}, Accuracy after training: {accuracy}\n")






SUBTASK 1: 

Loss before training: 218.92393493652344, Accuracy before training: 0.4277156549520767

training .....

.....training done with loss: 1326.3746177777648!


Loss after training: 47.512081146240234, Accuracy after training: 0.9513777955271565

SUBTASK 2: 



ValueError: Exception encountered when calling layer "twin_mnist_model_4" (type TwinMNISTModel).

in user code:

    File "/tmp/ipykernel_4913/1994251168.py", line 54, in call  *
        ret = tf.squeeze(self.out_layer_sub2(combined_x), axis=None, name=None)
    File "/home/julian/anaconda3/envs/iannwtf/lib/python3.7/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler  **
        raise e.with_traceback(filtered_tb) from None

    ValueError: tf.function only supports singleton tf.Variables created on the first call. Make sure the tf.Variable is only created once or created outside tf.function. See https://www.tensorflow.org/guide/function#creating_tfvariables for more information.


Call arguments received by layer "twin_mnist_model_4" (type TwinMNISTModel):
  • images=('tf.Tensor(shape=(32, 784), dtype=float32)', 'tf.Tensor(shape=(32, 784), dtype=float32)')
  • subtask=2
  • training=False

In [None]:
loss, accuracy = test_loop(model, test_ds=val_ds_sub2, subtask=2)
print(f"Loss after training: {loss}, Accuracy after training: {accuracy}\n")

# 1. instantiate model

# 2. choose a path to save the weights

#save_path = ...

# 2. pass arguments to training loop function

#training_loop(...)

label:  [ 8  9  8 16 11  1  6 16  7 10 12 11  9  6 10  8  3  7  4 10  7  5 14 15
  0 14 13  8 12  8  9 10]
prediction:  [ 2  9  9 16 11  2  6 16  5 12 12 13  9  5 11  9  2  9  9  9  5  5 15 15
  2 13 13  9 15  5  9  9]
0.34375
label:  [10 14  7 10  9 12 11 12  4  7  5  6 17 16 10 17  4 16  7  9 12 11  4 10
  6 16 15  6  4 13  7 11]
prediction:  [ 9 13  6  3  9 12 12 12  9  9  5  7 16 16 11 16  5 15  5 16 13  9  2 12
  5 16 15  5  5 12  2  9]
0.21875
label:  [14 10 18 11  6 16 13  9 15  9  7  8 15  6 15 12  3 11  6  3 12 14 14  4
 10  6  4 13  3 16  3  8]
prediction:  [ 6  7 16  9  8 11 11  5 16  9  9  9 15  7 13  5  3 13  5  9 12 15 16  7
  9  8  4 15  5 16  2  9]
0.1875
label:  [11 10  7  8 11 11  6 11  9 11 13 17  1 17 18  3  9  9  9  4 14  5  7  6
 14 10  7  1 12  5 10 13]
prediction:  [12  9  5  5 11  9  5 13  9 12  8 16  2 15 16  2  9  9  9  3 16  5  9  8
 12 10  9  2 13  5  9 14]
0.25
label:  [ 1  7  4  9 15  8  9  1  9 11  5  8  0 12  8  8  9  9  9 12  7  7  9  4
 10 10 17  3 16

KeyboardInterrupt: 