In [1]:
# disable compiler warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# imports 
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.python.keras.layers import Dense
from typing import List
import datetime

%load_ext tensorboard

In [2]:
# get mnist from tensorflow_datasets
mnist = tfds.load("mnist", split =["train","test"], as_supervised=True)
train_ds = mnist[0]
val_ds = mnist[1]

# write function to create the dataset that we want
def preprocess(data, batch_size, subtask):
    # FIRST STEP
    # image should be float
    data = data.map(lambda x, t: (tf.cast(x, float), t))
    # image should be flattened
    data = data.map(lambda x, t: (tf.reshape(x, (-1,)), t))
    # image vector will here have values between -1 and 1
    data = data.map(lambda x,t: ((x/128.)-1., t))
    # we want to have two mnist images in each example
    # this leads to a single example being ((x1,y1),(x2,y2))
    zipped_ds = tf.data.Dataset.zip((data.shuffle(2000), 
                                     data.shuffle(2000)))


    # SECOND STEP
    # a + b >= 5 is a boolean classification -> one output perceptron, BinaryCrossEntropy loss function
    # y_true: 0 oder 1, y_pred : [0,1]
    # 
    # a - b = y choose a number for {-9, -8, ..., 8, 9} softmax from 19 possibilities, CategoricalCrossEntropy loss function
    # y_true: one-hot-vector size 19, y_pred : one-hot-vector size 19 softmax

    if (subtask == 1):
        # target is 1 if value1 + value2 >= 5 else 0
        zipped_ds = zipped_ds.map(lambda x1, x2: (x1[0], x2[0], x1[1] + x2[1] >= 5))
        # transform boolean target to int
        zipped_ds = zipped_ds.map(lambda x1, x2, t: (x1,x2, tf.cast(t, tf.int32)))
    elif (subtask == 2):
        # target is value of label1 - label2
        zipped_ds= zipped_ds.map(lambda x1, x2: (x1[0], x2[0], x1[1] - x2[1]))


    # batch the dataset
    zipped_ds = zipped_ds.batch(batch_size)
    # prefetch
    zipped_ds = zipped_ds.prefetch(tf.data.AUTOTUNE)
    return zipped_ds

In [3]:

# Model of our network
class TwinMNISTModel(tf.keras.Model):

    # constructor
    def __init__(self, optimizer, subtask):
        super().__init__()
        # inherit functionality from parent class

        # optimizer, loss function and metrics
        self.optimizer = optimizer
        
        # layers to be used
        # feed 28*28 pixels into this layer, spits out 0-9 one-hot-vector
        self.dense1 = tf.keras.layers.Dense(32, activation=tf.nn.relu)

        # feed in 0-9 one-hot-vector, spits out softmax representation
        self.dense2 = tf.keras.layers.Dense(10, activation=tf.nn.softmax)

        # a + b >= 5 ? gets in two softmax representations of a digit, spits out 0 = False, 1 = True
        if subtask == 1:
            self.out_layer = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
            self.loss_function = tf.keras.losses.BinaryCrossentropy()
            self.metrics_list = [tf.keras.metrics.BinaryAccuracy(name="Accuracy"),
                                 tf.keras.metrics.Mean(name="loss")]
                                 
        # a - b = y, gets in two softmax representations of a digit, spits out a value
        else:
            self.out_layer = tf.keras.layers.Dense(1, activation=None)
            self.loss_function = tf.keras.losses.MeanSquaredError()
            self.metrics_list = [tf.keras.metrics.MeanSquaredError(name="Accuracy"),
                                 tf.keras.metrics.Mean(name="loss")]
        
        
    # call method
    @tf.function
    def call(self, images, training=False):
        # get images
        img1, img2 = images

        # let image go through first two layers, spits out one-hot-vector representing the number seen
        img1_x = self.dense1(img1)
        img1_x = self.dense2(img1_x)
        
        # same with second image
        img2_x = self.dense1(img2)
        img2_x = self.dense2(img2_x)

        # concat the one-hot-vectors
        combined_x = tf.concat([img1_x, img2_x ], axis=1)

        # concatenated one hot vector into the output layer, squeeze the axis with dim=1
        return tf.squeeze(self.out_layer(combined_x), axis=None, name=None)



    # 3. metrics property
    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model



    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()



    # train_step method
    def train_step(self, data):
        img1, img2, label = data
        
        # compute output and loss, train the variables
        with tf.GradientTape() as tape:
            output = self((img1, img2), training=True)
            loss = self.loss_function(label, output)
            
        # update trainable variables
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        # rounds output with bankers rounding
        function_to_map = lambda x: tf.cast(tf.math.round(x), tf.float32)
        output = tf.map_fn(function_to_map, output)

        # update metrics
        self.metrics[0].update_state(output, label)
        self.metrics[1].update_state(loss)
        
        # return a dict with metric information
        return {m.name : m.result() for m in self.metrics_list}



    # test_step method
    def test_step(self, data):
        img1, img2, label = data

        # compute output and loss, without training
        output = self((img1, img2), training=False)
        loss = self.loss_function(label, output)

        # rounds output with bankers rounding
        function_to_map = lambda x: tf.cast(tf.math.round(x), tf.float32)
        output = tf.map_fn(function_to_map, output)
        
        
        # update metrics
        self.metrics[0].update_state(output, label)
        self.metrics[1].update_state(loss)

        print("out: ", output, "\n label: ", label, "\naccuracy: ", self.metrics[0].result())

        # return a dict with metric information 
        return {m.name : m.result() for m in self.metrics_list}

## Create a summary writer to log data

- use tf.summary.create_file_writer(log_path)

In [4]:
config_name = "Run-42"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs//{config_name}/{current_time}/val"
train_summary_writer = tf.summary.create_file_writer(train_log_path)
val_summary_writer = tf.summary.create_file_writer(val_log_path)

## Write a training loop function

Arguments: 
 - the model to train, 
 - the data to train on, 
 - the data to test on, 
 - how many epochs to train, 
 - the train summary writer object to use for logging
 - the validation summary writer object to use for logging
 - a path to save trained model weights to

In [5]:
# trains the model by iterating through the dataset and applying training_step method epochs time
def training_loop(model, train_ds, epochs, train_summary_writer):
    # iterate over epochs
    for epoch in range(epochs):
        print(f"epoch: {epoch}")

        # train steps on all batches in the training data
        for (img1, img2, label) in train_ds:
            metrics = model.train_step((img1, img2, label))
            
            # keep data in summary with metrics
            with train_summary_writer.as_default():
                for metric in model.metrics_list:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)
        
        # print current metric values and reset the metrics
        print([f"{key} : {value.numpy()}" for (key, value ) in metrics.items()])
        model.reset_metrics()



# tests overall performance of model
def test_loop(model, test_ds, val_summary_writer):
    # test steps on every item in test dataset
    for (img1, img2, label) in test_ds:
        metrics = model.test_step((img1, img2, label))
        
        # keep data with metrics
        with val_summary_writer.as_default():
            for metric in model.metrics_list:
                tf.summary.scalar(f"{metric.name}", metric.result(), step=1)
    
    #print current metric values and reset the metrics
    print([f"{key} : {value.numpy()}" for (key, value ) in metrics.items()])
    model.reset_metrics()

## Use the training loop function to train the model

In [6]:
# get preprocessed data for both subtasks
train_ds_sub1 = preprocess(train_ds, batch_size=32, subtask=1) #train_ds.apply(preprocess)
val_ds_sub1 = preprocess(val_ds, batch_size=32, subtask=1) #val_ds.apply(preprocess)

train_ds_sub2 = preprocess(train_ds, batch_size=32, subtask=2) #train_ds.apply(preprocess)
val_ds_sub2 = preprocess(val_ds, batch_size=32, subtask=2) #val_ds.apply(preprocess)


# check the contents of the dataset
for img1, img2, label in train_ds_sub1.take(1):
    print(img1.shape, img2.shape, label.shape)
    #print(img1, img2, label)

for img1, img2, label in train_ds_sub2.take(1):
    print(img1.shape, img2.shape, label.shape)
    #print(img1, img2, label)

""" SUBTASK 1"""
"""
model = TwinMNISTModel(tf.keras.optimizers.Adam(), subtask=1)
print(f"testing beforehand subtask 1 ...")
test_loop(model,
        test_ds=val_ds_sub1,
        val_summary_writer=val_summary_writer)
        
print(f"training subtask 1 ...\n")
training_loop(model,
              train_ds=train_ds_sub1,
              epochs=3, 
              train_summary_writer=train_summary_writer)

print(f"testing subtask 1 ...\n")
test_loop(model,
        test_ds=val_ds_sub1,
        val_summary_writer=val_summary_writer)
"""


""" SUBTASK 2 """

model = TwinMNISTModel(tf.keras.optimizers.Adam(), subtask=2)
print(f"testing beforehand subtask 2 ...")
test_loop(model,
        test_ds=val_ds_sub2,
        val_summary_writer=val_summary_writer)

print(f"training subtask 2 ...\n")
training_loop(model,
              train_ds=train_ds_sub2,
              epochs=7, 
              train_summary_writer=train_summary_writer)

print(f"testing subtask 2...\n")
test_loop(model,
        test_ds=val_ds_sub2,
        val_summary_writer=val_summary_writer)

(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)
testing beforehand subtask 2 ...
out:  tf.Tensor(
[-0. -0. -0.  0.  0. -0. -0. -0. -0. -0. -0. -0.  0. -0.  0. -0. -0.  0.
 -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0.  0. -0. -0.], shape=(32,), dtype=float32) 
 label:  tf.Tensor(
[-1  0 -5 -8  1  3  7 -2  6 -7  1  2 -6  7  1 -2 -6 -3  1 -2  3 -4 -1 -1
  7 -1 -4 -2 -1  1 -2 -8], shape=(32,), dtype=int64) 
accuracy:  tf.Tensor(17.1875, shape=(), dtype=float32)
out:  tf.Tensor(
[-0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0.  0. -0.
 -0.  0.  0. -0. -0. -0. -0. -0. -0.  0. -0. -0. -0. -0.], shape=(32,), dtype=float32) 
 label:  tf.Tensor(
[-8 -1  0 -6  1  7  2 -1 -2  4  1 -5  5 -2  4  6 -7  6 -3  6 -8  0  6  4
 -2  3 -7  5  3  3 -1  1], shape=(32,), dtype=int64) 
accuracy:  tf.Tensor(18.53125, shape=(), dtype=float32)
out:  tf.Tensor(
[-0. -0. -0. -0. -0.  0. -0. -0. -0. -0. -0. -0. -0. -0. -0.  0. -0. -0.
 -0. -0. -0. -0. -0. -0. -0. -0. -0. -0.  0.  0. -0.  0.], shape

In [None]:
# open the tensorboard logs
%tensorboard --logdir logs/

Reusing TensorBoard on port 6006 (pid 24431), started 0:21:48 ago. (Use '!kill 24431' to kill it.)