In [164]:
# disable compiler warnings
import os

# imports 
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.python.keras.layers import Dense
from typing import List
import datetime
from tqdm.notebook import tqdm


from tensorflow.python.client import device_lib
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'  # FATAL
print("Num GPUs Available: ", tf.config.list_physical_devices('GPU'))


Num GPUs Available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [165]:
(train_ds, val_ds), ds_info = tfds.load('mnist', split=['train', 'test'], as_supervised=True, with_info=True)

#tfds.show_examples(train_ds, ds_info)

In [166]:
class CustomLSTM(tf.keras.layers.AbstractRNNCell):
  def __init__(self, units, **kwargs):
      self.units = units
      super(CustomLSTM, self).__init__(**kwargs)

      initializer = tf.keras.initializers.Orthogonal()

      self.layer_information_eraser = tf.keras.layers.Dense(self.units, activation='sigmoid', kernel_initializer=initializer)
      self.layer_new_information_filter = tf.keras.layers.Dense(self.units, activation='sigmoid', kernel_initializer=initializer)
      self.layer_new_information = tf.keras.layers.Dense(self.units, activation='tanh', kernel_initializer=initializer)
      self.layer_information_transfer_filter = tf.keras.layers.Dense(self.units, activation='sigmoid', kernel_initializer=initializer)

  @property
  def state_size(self):
    return (self.units, self.units)
    #return [tf.TensorShape([self.units]), tf.TensorShape([self.units])]

  @property
  def output_size(self):
    return self.units

  #def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
    #return [tf.zeros([self.units], tf.float32), tf.zeros([self.units], tf.float32)]

  def call(self, inputs, states):
    hidden_state, cell_state = states

    hidden_input = tf.concat([inputs, hidden_state], 1)

    cell_state = tf.math.multiply(cell_state, self.layer_information_eraser(hidden_input))
    cell_state = tf.math.add(cell_state, tf.math.multiply(self.layer_new_information(hidden_input), self.layer_new_information_filter(hidden_input)))

    hidden_state = tf.math.multiply(tf.math.tanh(cell_state), self.layer_information_transfer_filter(hidden_input))

    return hidden_state, [hidden_state, cell_state]

In [167]:
class BasicConv(tf.keras.Model):
    def __init__(self, seq_size, optimizer=tf.keras.optimizers.Adam()):
        super(BasicConv, self).__init__()

        self.optimizer = optimizer
        #self.loss_function = tf.keras.losses.CategoricalCrossentropy()
        #self.metrics_list = [[tf.keras.metrics.CategoricalAccuracy(name="test_accuracy"),
        #                     tf.keras.metrics.Mean(name="test_loss"),
        #                     tf.keras.metrics.Mean(name="test_frob_norm")],
        #                     [tf.keras.metrics.CategoricalAccuracy(name="train_accuracy"),
        #                     tf.keras.metrics.Mean(name="train_loss"),
        #                     tf.keras.metrics.Mean(name="train_frob_norm")]]

        #self.metrics_list = [tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
        #                     tf.keras.metrics.Mean(name="loss"),
        #                     tf.keras.metrics.Mean(name="frob_norm")]

        output_size = (int) (9*seq_size+1)
  
        self.pooling = tf.keras.layers.MaxPooling2D()
        self.my_layers = [
                        tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', input_shape=(28, 28, 1)),
                        tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', input_shape=(28, 28, 1)),
                        tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D()),
                        tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', input_shape=(14, 14, 1)),
                        tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', input_shape=(14, 14, 1)),
                        tf.keras.layers.TimeDistributed(tf.keras.layers.GlobalAvgPool2D()),
                        #tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(10, activation='softmax')),
                        #tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1, activation='relu')),
                        tf.keras.layers.RNN(CustomLSTM(64), unroll=True, return_sequences=True),
                        #tf.keras.layers.LSTM(20, unroll=True, return_sequences=True),
                        #tf.keras.layers.Dense(output_size, activation='softmax')
                        tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(output_size, activation='softmax'))
                        ]



    @tf.function
    def call(self, x, training=False):
        x = self.my_layers[0](x)
        x = self.my_layers[1](x)
        x = self.my_layers[2](x)
        x = self.my_layers[3](x)
        x = self.my_layers[4](x)
        x = self.my_layers[5](x)
        x = self.my_layers[6](x)
        x = self.my_layers[7](x)
        #x = self.my_layers[8](x)
        #x = tf.round(x)
        
        #for layer in self.my_layers:
        #    tf.print(x)
        #    x = layer(x)
        return x

    @tf.function
    def compute_frobenius(self):
        frobenius_norm = tf.zeros((1,))
        for var in self.trainable_variables:
            frobenius_norm += tf.norm(var, ord="euclidean")
        return frobenius_norm

    # 3. metrics property
    #"""@property"""
    #def metrics(self):
    #    return self.metrics_list

    # 4. reset all metrics objects
    #def reset_metrics(self):
    #    for metric in self.metrics:
    #      #for metric in metric_list:
    #      metric.reset_states()

    """
    # train_step method
    @tf.function
    def train_step(self, data):
        img, label = data
        
        # compute output and loss, train the variables
        with tf.GradientTape() as tape:
            output = self(img, training=True)
            loss = self.loss_function(label, output)
            
        # update trainable variables
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        # update metrics
        self.metrics_list[1][0].update_state(tf.argmax(output, axis=1), tf.argmax(label, axis=1))
        self.metrics_list[1][1].update_state(loss)
        self.metrics_list[1][2].update_state(self.compute_frobenius())
        
        # return a dict with metric information
        return {m.name : m.result() for m in self.metrics_list[1]}



    # test_step method
    @tf.function
    def test_step(self, data):
        img, label = data

        # compute output and loss, without training
        output = self(img, training=False)
        loss = self.loss_function(label, output)

        # update metrics
        self.metrics_list[0][0].update_state(tf.argmax(output, axis=1), tf.argmax(label, axis=1))
        self.metrics_list[0][1].update_state(loss)
        self.metrics_list[0][2].update_state(self.compute_frobenius())

        # return a dict with metric information 
        return {m.name : m.result() for m in self.metrics_list[0]}
    """

In [168]:

def create_summary_writers(config_name):
    
    # Define where to save the logs
    # along with this, you may want to save a config file with the same name so you know what the hyperparameters were used
    # alternatively make a copy of the code that is used for later reference
    
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    train_log_path = f"logs/{config_name}/{current_time}/train"
    val_log_path = f"logs/{config_name}/{current_time}/val"

    # log writer for training metrics
    train_summary_writer = tf.summary.create_file_writer(train_log_path)

    # log writer for validation metrics
    val_summary_writer = tf.summary.create_file_writer(val_log_path)
    
    return train_summary_writer, val_summary_writer

In [169]:
# gets in a dataset and returns target values
def prepare_data(dataset, seq_size, batch_size):

    # convert data from uint8 to float32
    dataset = dataset.map(lambda img, target: (tf.cast(img, tf.float32), target))
    # convert image values from range [0, 255] to [-1, 1]
    dataset = dataset.map(lambda img, target: ((img/128.)-1., target))

    # Create random tuples of 4 images [[img, ...], [target, ...]]
    dataset = dataset.batch(seq_size, drop_remainder=True)

    #for a, b in dataset:
      #print(list(b.as_numpy_iterator()))

    for imgs, targets in dataset.take(1):
      print(targets)
      for target in targets:
        print(target)

    # Generate cumulative sum targets
    def generate_sums(elems):
      signs = tf.constant([1 if i % 2 == 0 else -1 for i in range(len(elems))], dtype=tf.int64)
      return tf.cumsum(tf.math.multiply(elems, signs))

    dataset = dataset.map(lambda imgs, targets: (imgs, generate_sums(targets)))

    # Convert targets to one hot vectors
    one_hot_depth = (int) (9*seq_size+1)
    dataset = dataset.map(lambda imgs, targets: (imgs, tf.one_hot(targets + (int) (one_hot_depth / 2 + 1), depth=one_hot_depth)))

    for imgs, targets in dataset.take(1):
      print(targets)
      for target in targets:
        print(target)
    
    dataset = dataset.cache()
    dataset = dataset.shuffle(4096)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset

In [170]:
# trains the model by iterating through the dataset and applying training_step method epochs time
def training_loop(model, train_ds, test_ds, epochs, train_summary_writer, memory):
    metrics = []

    # iterate over epochs
    for epoch in tqdm(range(epochs)):

        # train steps on all batches in the training data
        for (img, label) in train_ds:
            metrics = model.train_step((img, label))
            
            # keep data in summary with metrics
            with train_summary_writer.as_default():
                for metric in model.metrics_list[1]:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

        for (key, value) in metrics.items():
            memory[key].append(value.numpy())

        memory = test_loop(model=model,
                           test_ds=test_ds,
                           val_summary_writer=val_summary_writer,
                           memory=memory)
        
        # print current metric values and reset the metrics
        tf.print([f"{key} : {value.numpy()}" for (key, value ) in metrics.items()])
        model.reset_metrics(1)

    return memory


# tests overall performance of model
def test_loop(model, test_ds, val_summary_writer, memory):
    metrics = []
    # test steps on every item in test dataset
    for (img, label) in tqdm(test_ds):
        metrics = model.test_step((img, label))
        
        # keep data with metrics
        with val_summary_writer.as_default():
            for metric in model.metrics_list[0]:
                tf.summary.scalar(f"{metric.name}", metric.result(), step=1)

    for (key, value) in metrics.items():
        memory[key].append(value.numpy())

    print([f"{key} : {value.numpy()}" for (key, value ) in metrics.items()])

    model.reset_metrics(0)

    return memory

# visualize accuracy, loss and frobenius norm
def visualization(accuracies, losses, frobenius, name):
    plt.figure()
    line1, = plt.plot(accuracies, "b")
    line2, = plt.plot(losses, "r")

    frob_new = frobenius/np.max(frobenius) * np.max(losses)
    line3, = plt.plot(frob_new, "y" )

    plt.xlabel("Training steps")
    plt.ylabel("Loss/Accuracy")
    plt.legend((line1, line2, line3),("Accuracy", "Loss", "Frobenius Norm"))
    plt.savefig(name)
    plt.show()


In [171]:
# overall hyperparameters to compare with and without overfitting precautions methods
epochs = 15
batch_size = 32
seq_size = 4

## no augmentation, normal model

In [172]:
train_summary_writer, val_summary_writer = create_summary_writers(config_name="UNTOUCHED")

model = BasicConv(seq_size)


#for img, label in train_ds.take(1):
#    print(img.shape)
#    print(label.shape)

train_dataset = prepare_data(train_ds, seq_size, batch_size)
#train_dataset = train_ds.apply(lambda dataset: prepare_data(dataset, seq_size, batch_size))
val_dataset = prepare_data(val_ds, seq_size, batch_size)
#val_dataset = val_ds.apply(lambda dataset: prepare_data(dataset, seq_size, batch_size))

#for img, label in train_dataset.take(1):
#    print(img.shape)
#    print(label.shape)

tf.Tensor([4 1 0 7], shape=(4,), dtype=int64)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(7, shape=(), dtype=int64)
tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]], shape=(4, 37), dtype=float32)
tf.Tensor(
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], shape=(37,), dtype=float32)
tf.Tensor(
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], shape=(37,), dtype=floa

In [173]:
#Test compile
#print(train_dataset)
#for imgs, targets in train_dataset.take(1):
#  print(targets)

model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss=tf.keras.losses.CategoricalCrossentropy(),
              #loss="mean_squared_error",
              metrics=[
                      tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
                      #tf.keras.metrics.Accuracy(name="accuracy"),
                      #tf.keras.metrics.Mean(name="frob_norm")
                      ]
              )

history = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=50)

model.summary()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model: "basic_conv_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 max_pooling2d_32 (MaxPoolin  multiple                 0 (unused)
 g2D)                                                            
                                                                 
 conv2d_64 (Conv2D)          multiple                  1

In [174]:
"""
tfds.benchmark(train_dataset, batch_size=batch_size)

print("\n************ Training UNTOUCHED ************\n")

memory = {"test_accuracy" : [],
          "test_loss" : [],
          "test_frob_norm" : [],
          "train_accuracy" : [],
          "train_loss" : [],
          "train_frob_norm" : []
          }

memory = training_loop(model,
                       train_ds=train_dataset,
                       test_ds=val_dataset,
                       epochs=tf.constant(epochs),
                       train_summary_writer=train_summary_writer,
                       memory=memory)

visualization(memory["train_accuracy"], memory["train_loss"], memory["train_frob_norm"], "aug_train")
"""

'\ntfds.benchmark(train_dataset, batch_size=batch_size)\n\nprint("\n************ Training UNTOUCHED ************\n")\n\nmemory = {"test_accuracy" : [],\n          "test_loss" : [],\n          "test_frob_norm" : [],\n          "train_accuracy" : [],\n          "train_loss" : [],\n          "train_frob_norm" : []\n          }\n\nmemory = training_loop(model,\n                       train_ds=train_dataset,\n                       test_ds=val_dataset,\n                       epochs=tf.constant(epochs),\n                       train_summary_writer=train_summary_writer,\n                       memory=memory)\n\nvisualization(memory["train_accuracy"], memory["train_loss"], memory["train_frob_norm"], "aug_train")\n'