In [117]:
import tensorflow as tf
import tensorflow_datasets as tfds
import datetime

# magic line only needed in jupyter notebooks!
%load_ext tensorboard 

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [132]:
# load data with as_supervised=True to import labels
mnist = tfds.load("mnist", split =["train","test"], as_supervised=True)
train_ds = mnist[0]
val_ds = mnist[1]

In [142]:
# 2. write function to create the dataset that we want
def preprocess(data, batch_size, task):
    # image should be float
    data = data.map(lambda x, t: (tf.cast(x, float), t))
    # image should be flattened
    data = data.map(lambda x, t: (tf.reshape(x, (-1,)), t))
    # image vector will here have values between -1 and 1
    data = data.map(lambda x,t: ((x/128.)-1., t))
    # we want to have two mnist images in each example
    
    # this leads to a single example being ((x1,y1),(x2,y2))
    data = tf.data.Dataset.zip((data.shuffle(2000), data.shuffle(2000)))
    
    # map ((x1,y1),(x2,y2)) to (x1,x2, y1==y2*) *boolean
    if(task==1):
        data = data.map(lambda x1, x2: (x1[0], x2[0], x1[1]+x2[1]>=5))
        # transform boolean target to int
        data = data.map(lambda x1, x2, t: (x1,x2, tf.cast(t, tf.int32)))
    else:
        # possible 19 results for a-b [-9, 9], encode as one hot vector for classification
        data = data.map(lambda x1, x2: (x1[0], x2[0], tf.one_hot((x1[1]-x2[1]), 19, dtype=tf.float32)))
    
    # batch the dataset
    data = data.batch(batch_size)
    # prefetch
    data = data.prefetch(tf.data.AUTOTUNE)
    return data

train_ds1 = preprocess(train_ds, batch_size=32, task=1) #train_ds.apply(preprocess)
val_ds1 = preprocess(val_ds, batch_size=32, task=1) #val_ds.apply(preprocess)

train_ds2 = preprocess(train_ds, batch_size=32, task=2) #train_ds.apply(preprocess)+
val_ds2 = preprocess(val_ds, batch_size=32, task=2) #val_ds.apply(preprocess)
print(train_ds2)

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 784), dtype=tf.float32, name=None), TensorSpec(shape=(None, 784), dtype=tf.float32, name=None), TensorSpec(shape=(None, 19), dtype=tf.float32, name=None))>


In [134]:
class BinaryModel(tf.keras.Model):

    # 1. constructor
    def __init__(self):
        super().__init__()
        # inherit functionality from parent class

        # optimizer, loss function and metrics
        self.metrics_list = [tf.keras.metrics.BinaryAccuracy(name="accuracy"), tf.keras.metrics.Mean(name="loss")]
        
        self.optimizer = tf.keras.optimizers.Adam()
        
        self.loss_function = tf.keras.losses.BinaryCrossentropy()

        # layers to encode the images (both layers used for both images)
        self.dense1 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        
        self.dense3 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        
        self.out_layer = tf.keras.layers.Dense(1,activation=tf.nn.sigmoid)
        
    # 2. call method (forward computation)
    def call(self, images, training=False):
        img1, img2 = images
        
        img1_x = self.dense1(img1)
        img1_x = self.dense2(img1_x)
        
        img2_x = self.dense1(img2)
        img2_x = self.dense2(img2_x)
        
        combined_x = tf.concat([img1_x, img2_x ], axis=1)
        combined_x = self.dense3(combined_x)
        return self.out_layer(combined_x)

    # 3. metrics property
    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model

    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    # 5. train step method
    @tf.function
    def train_step(self, data):
        img1, img2, label = data
        
        with tf.GradientTape() as tape:
            output = self((img1, img2), training=True)
            loss = self.loss_function(label, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update the state of the metrics according to loss
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        # return a dictionary with metric names as keys and metric results as values
        return {m.name : m.result() for m in self.metrics}

    # 6. test_step method
    @tf.function
    def test_step(self, data):
        img1, img2, label = data
        # same as train step (without parameter updates)
        output = self((img1, img2), training=False)
        loss = self.loss_function(label, output)
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        return {m.name : m.result() for m in self.metrics}

In [138]:
class CategoricalModel(tf.keras.Model):

    # 1. constructor
    def __init__(self):
        super().__init__()
        # inherit functionality from parent class

        # optimizer, loss function and metrics
        self.metrics_list = [tf.keras.metrics.CategoricalAccuracy(name="accuracy"), tf.keras.metrics.Mean(name="loss")]
        
        self.optimizer = tf.keras.optimizers.Adam()
        
        self.loss_function = tf.keras.losses.CategoricalCrossentropy()

        # layers to encode the images (both layers used for both images)
        self.dense1 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        
        self.dense3 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        
        self.out_layer = tf.keras.layers.Dense(19,activation=tf.nn.softmax)
        
    # 2. call method (forward computation)
    def call(self, images, training=False):
        img1, img2 = images
        
        img1_x = self.dense1(img1)
        img1_x = self.dense2(img1_x)
        
        img2_x = self.dense1(img2)
        img2_x = self.dense2(img2_x)
        
        combined_x = tf.concat([img1_x, img2_x ], axis=1)
        combined_x = self.dense3(combined_x)
        return self.out_layer(combined_x)

    # 3. metrics property
    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model

    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    # 5. train step method
    @tf.function
    def train_step(self, data):
        img1, img2, label = data
        
        with tf.GradientTape() as tape:
            output = self((img1, img2), training=True)
            loss = self.loss_function(label, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update the state of the metrics according to loss
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        # return a dictionary with metric names as keys and metric results as values
        return {m.name : m.result() for m in self.metrics}

    # 6. test_step method
    @tf.function
    def test_step(self, data):
        img1, img2, label = data
        # same as train step (without parameter updates)
        output = self((img1, img2), training=False)
        loss = self.loss_function(label, output)
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        return {m.name : m.result() for m in self.metrics}

In [140]:
def create_summary_writers(config_name):
    
    # Define where to save the logs
    # along with this, you may want to save a config file with the same name so you know what the hyperparameters were used
    # alternatively make a copy of the code that is used for later reference
    
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    train_log_path = f"logs/{config_name}/{current_time}/train"
    val_log_path = f"logs/{config_name}/{current_time}/val"

    # log writer for training metrics
    train_summary_writer = tf.summary.create_file_writer(train_log_path)

    # log writer for validation metrics
    val_summary_writer = tf.summary.create_file_writer(val_log_path)
    
    return train_summary_writer, val_summary_writer

train_summary_writer_subtask1, val_summary_writer_subtask1 = create_summary_writers(config_name="subtask_1_RUN1")
train_summary_writer_subtask2, val_summary_writer_subtask2 = create_summary_writers(config_name="subtask_2_RUN1")


In [136]:
import tqdm
def training_loop(model, train_ds, val_ds, start_epoch,
                  epochs, train_summary_writer, 
                  val_summary_writer, save_path):

    # 1. iterate over epochs
    for e in range(start_epoch, epochs):

        # 2. train steps on all batches in the training data
        for data in tqdm.tqdm(train_ds, position=0, leave=True):
            metrics = model.train_step(data)

        ## Training data
        # 3. log and print training metrics
        with train_summary_writer.as_default():
            # for scalar metrics:
            for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=e)
            # alternatively, log metrics individually (allows for non-scalar metrics such as tf.keras.metrics.MeanTensor)
            # e.g. tf.summary.image(name="mean_activation_layer3", data = metrics["mean_activation_layer3"],step=e)
        
        #print the metrics
        print([f"{key}: {value.numpy()}" for (key, value) in metrics.items()])
        
        # 4. reset metric objects, only reset now after 
        model.reset_metrics()

        #####################################################

        ## Validation data
        # 5. evaluate on validation data
        for data in val_ds:
            metrics = model.test_step(data)
        
        # 6. log validation metrics
        with val_summary_writer.as_default():
            # for scalar metrics:
            for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=e)
            # alternatively, log metrics individually (allows for non-scalar metrics such as tf.keras.metrics.MeanTensor)
            # e.g. tf.summary.image(name="mean_activation_layer3", data = metrics["mean_activation_layer3"],step=e)
            
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])
        # 7. reset metric objects
        model.reset_metrics()
        
    # 8. save model weights if save_path is given
    if save_path:
        model.save_weights(save_path)

In [124]:
# open the tensorboard logs
%tensorboard --logdir logs/

In [141]:
# 1. instantiate model
model_subtask1 = BinaryModel()
model_subtask2 = CategoricalModel()

# 2. choose a path to save the weights
save_path_subtask1 = "subtask1_trained_model_RUN1"
save_path_subtask2 = "subtask2_trained_model_RUN1"

# 3. pass arguments to training loop function
training_loop(model=model_subtask2,
    train_ds=train_ds2,
    val_ds=val_ds2,
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask2,
    val_summary_writer=val_summary_writer_subtask2,
    save_path=save_path_subtask2)

  0%|          | 0/1875 [00:00<?, ?it/s]2022-11-26 17:01:18.702971: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
100%|██████████| 1875/1875 [00:22<00:00, 84.35it/s]


['accuracy: 0.12470000237226486', 'loss: 929476.8125']


2022-11-26 17:01:40.743775: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-26 17:01:43.470262: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


['val_accuracy: 0.028800001367926598', 'val_loss: 3265666.5']


100%|██████████| 1875/1875 [00:19<00:00, 96.48it/s] 


['accuracy: 0.11973333358764648', 'loss: 11808498.0']
['val_accuracy: 0.5501000285148621', 'val_loss: 32123520.0']


100%|██████████| 1875/1875 [00:19<00:00, 95.50it/s] 


['accuracy: 0.1218833327293396', 'loss: 39904636.0']
['val_accuracy: 0.0', 'val_loss: 88164768.0']


100%|██████████| 1875/1875 [00:19<00:00, 97.35it/s] 


['accuracy: 0.12343333661556244', 'loss: 83559032.0']
['val_accuracy: 0.549500048160553', 'val_loss: 67226880.0']


100%|██████████| 1875/1875 [00:19<00:00, 96.31it/s] 


['accuracy: 0.1205499991774559', 'loss: 142675264.0']
['val_accuracy: 0.0', 'val_loss: 145255664.0']


100%|██████████| 1875/1875 [00:19<00:00, 96.48it/s] 


['accuracy: 0.12361666560173035', 'loss: 224616240.0']
['val_accuracy: 0.09290000796318054', 'val_loss: 198728048.0']


100%|██████████| 1875/1875 [00:19<00:00, 96.88it/s] 


['accuracy: 0.12430000305175781', 'loss: 319855104.0']
['val_accuracy: 0.0642000064253807', 'val_loss: 330234816.0']


100%|██████████| 1875/1875 [00:19<00:00, 95.82it/s] 


['accuracy: 0.12346667051315308', 'loss: 453290048.0']
['val_accuracy: 0.06130000203847885', 'val_loss: 581762496.0']


100%|██████████| 1875/1875 [00:19<00:00, 94.09it/s] 


['accuracy: 0.11933333426713943', 'loss: 602365120.0']
['val_accuracy: 0.5482000112533569', 'val_loss: 645170880.0']


100%|██████████| 1875/1875 [00:19<00:00, 95.37it/s] 


['accuracy: 0.12038333714008331', 'loss: 807302400.0']
['val_accuracy: 0.020900001749396324', 'val_loss: 1198821248.0']


In [None]:
#load the model:
fresh_model = TwinMNISTModel()

# build the model's parameters by calling it on input
for img1,img2,label in train_ds:
    fresh_model((img1,img2));
    break

# load the saved weights
model = fresh_model.load_weights(save_path)
