In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
import datetime
import tqdm

%load_ext tensorboard

In [2]:
def augment(image, label):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_saturation(image, 0.7, 1.3)
    image = tf.image.random_contrast(image, 0.8, 1.2)
    image = tf.image.random_brightness(image, 0.1)
    return image, label 

def get_cifar10(batch_size):
    """
    Load and prepare CIFAR-10 as a tensorflow dataset.
    Returns a train and a validation dataset.
    Args:
    batch_size (int)
    """
    train_ds, val_ds = tfds.load('cifar10', split=['train', 'test'], shuffle_files=True)

    one_hot = lambda x: tf.one_hot(x, 10)

    map_func = lambda x,y: (tf.cast(x, dtype=tf.float32)/255.,
                            tf.cast(one_hot(y),tf.float32))

    map_func_2 = lambda x: (x["image"],x["label"])

    train_ds = train_ds.map(map_func_2).map(map_func).cache()
    val_ds   = val_ds.map(map_func_2).map(map_func).cache()
    
    train_ds = train_ds.shuffle(4096).batch(batch_size)
    val_ds   = val_ds.shuffle(4096).batch(batch_size)

    return (train_ds.prefetch(tf.data.AUTOTUNE), val_ds.prefetch(tf.data.AUTOTUNE))

train_ds, val_ds = get_cifar10(128)

In [3]:
class BasicCNNBlock(tf.keras.layers.Layer):
    def __init__(self, depth, layers):
        super(BasicCNNBlock, self).__init__()
        self.layers = [tf.keras.layers.Conv2D(filters=depth, kernel_size=3, padding='same', activation='relu') for _ in range(layers)]

    @tf.function
    def call(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

class BasicCNN(tf.keras.Model):
    def __init__(self):
        super().__init__()

        self.metrics_list = [tf.keras.metrics.Mean(name="loss"),
                            tf.keras.metrics.CategoricalAccuracy(name="acc")]
        
        self.optimizer = tf.keras.optimizers.Adam()
        
        self.loss_function = tf.keras.losses.CategoricalCrossentropy()

        self.layer_list = [

                           tf.keras.layers.BatchNormalization(),
                           tf.keras.layers.Conv2D(filters=24, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Conv2D(filters=24, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Conv2D(filters=24, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Conv2D(filters=24, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Conv2D(filters=24, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Conv2D(filters=24, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),
                           
                           tf.keras.layers.BatchNormalization(),
                           tf.keras.layers.Conv2D(filters=48, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Conv2D(filters=48, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Conv2D(filters=48, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Conv2D(filters=48, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),

                           tf.keras.layers.BatchNormalization(),
                           tf.keras.layers.Conv2D(filters=96, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Conv2D(filters=96, kernel_size=3, padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.01)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.GlobalMaxPooling2D(),
                           
                           tf.keras.layers.Dense(10, activation='softmax')]
                           

    @tf.function
    def call(self, x):
        for item in self.layer_list:
            x = item(x)
        return x
     
    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model

    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    # 5. train step method
    @tf.function
    def train_step(self, data):
        img, target = data
        
        with tf.GradientTape() as tape:
            output = self(img, training=True)
            loss = self.loss_function(target, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        # update loss metric
        self.metrics[0].update_state(loss)
        
        # for all metrics except loss, update states (accuracy etc.)
        for metric in self.metrics[1:]:
            metric.update_state(target, output)

        # Return a dictionary mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}
    
    @tf.function
    def test_step(self, data):
        img, target = data

        output = self(img, training=False)
        loss = self.loss_function(target, output)

        self.metrics[0].update_state(loss)
        # for accuracy metrics:
        for metric in self.metrics[1:]:
            metric.update_state(target, output)

        return {m.name: m.result() for m in self.metrics}

In [4]:
# Define where to save the log
config_name= "batchnormblock+dropout0.1layer+L2"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs/{config_name}/{current_time}/val"

# log writer for training metrics
train_summary_writer = tf.summary.create_file_writer(train_log_path)

# log writer for validation metrics
val_summary_writer = tf.summary.create_file_writer(val_log_path)

def training_loop(model, train_ds, val_ds, epochs, train_summary_writer, val_summary_writer):
    for epoch in range(epochs):
        print(f"Epoch {epoch}:")
        
        # Training:
        
        for data in tqdm.tqdm(train_ds, position=0, leave=True):
            metrics = model.train_step(data)
            
            # logging the validation metrics to the log file which is used by tensorboard
            with train_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

        # print the metrics
        print([f"{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # reset all metrics (requires a reset_metrics method in the model)
        model.reset_metrics()    
        
        # Validation:
        for data in val_ds:
            metrics = model.test_step(data)
        
            # logging the validation metrics to the log file which is used by tensorboard
            with val_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)
                    
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # reset all metrics
        model.reset_metrics()
        print("\n")

In [4]:
%tensorboard --logdir logs/

Reusing TensorBoard on port 6006 (pid 14548), started 0:22:35 ago. (Use '!kill 14548' to kill it.)

In [6]:
tf.keras.backend.clear_session()

model = BasicCNN()

training_loop(model=model,
                train_ds=train_ds, 
                val_ds=val_ds, 
                epochs=20, 
                train_summary_writer=train_summary_writer, 
                val_summary_writer=val_summary_writer)

Epoch 0:


100%|██████████| 391/391 [00:26<00:00, 14.89it/s]


['loss: 1.6795326471328735', 'acc: 0.3856000006198883']
['val_loss: 1.4031211137771606', 'val_acc: 0.49050000309944153']


Epoch 1:


100%|██████████| 391/391 [00:20<00:00, 18.97it/s]


['loss: 1.246921420097351', 'acc: 0.5511999726295471']
['val_loss: 1.1244226694107056', 'val_acc: 0.5952000021934509']


Epoch 2:


100%|██████████| 391/391 [00:19<00:00, 19.67it/s]


['loss: 1.0284382104873657', 'acc: 0.6359800100326538']
['val_loss: 0.960030734539032', 'val_acc: 0.6543999910354614']


Epoch 3:


100%|██████████| 391/391 [00:19<00:00, 19.63it/s]


['loss: 0.8918923139572144', 'acc: 0.6864200234413147']
['val_loss: 0.8563884496688843', 'val_acc: 0.7013999819755554']


Epoch 4:


100%|██████████| 391/391 [00:19<00:00, 19.65it/s]


['loss: 0.7914332747459412', 'acc: 0.7213199734687805']
['val_loss: 0.8127869367599487', 'val_acc: 0.7145000100135803']


Epoch 5:


100%|██████████| 391/391 [00:19<00:00, 19.68it/s]


['loss: 0.7286777496337891', 'acc: 0.7434800267219543']
['val_loss: 0.7756235003471375', 'val_acc: 0.7297999858856201']


Epoch 6:


100%|██████████| 391/391 [00:19<00:00, 19.63it/s]


['loss: 0.6822593808174133', 'acc: 0.7612199783325195']
['val_loss: 0.7577524781227112', 'val_acc: 0.7382000088691711']


Epoch 7:


100%|██████████| 391/391 [00:19<00:00, 19.73it/s]


['loss: 0.635924220085144', 'acc: 0.7760400176048279']
['val_loss: 0.7155229449272156', 'val_acc: 0.7559999823570251']


Epoch 8:


100%|██████████| 391/391 [00:19<00:00, 19.69it/s]


['loss: 0.5997694134712219', 'acc: 0.790120005607605']
['val_loss: 0.6726654171943665', 'val_acc: 0.7684999704360962']


Epoch 9:


100%|██████████| 391/391 [00:19<00:00, 19.66it/s]


['loss: 0.5676122307777405', 'acc: 0.8022599816322327']
['val_loss: 0.678238570690155', 'val_acc: 0.7664999961853027']


Epoch 10:


100%|██████████| 391/391 [00:19<00:00, 19.70it/s]


['loss: 0.5473240613937378', 'acc: 0.8083999752998352']
['val_loss: 0.6579542756080627', 'val_acc: 0.7796000242233276']


Epoch 11:


100%|██████████| 391/391 [00:21<00:00, 18.21it/s]


['loss: 0.5112508535385132', 'acc: 0.8192999958992004']
['val_loss: 0.630044162273407', 'val_acc: 0.7836999893188477']


Epoch 12:


100%|██████████| 391/391 [00:19<00:00, 19.65it/s]


['loss: 0.49375712871551514', 'acc: 0.8259599804878235']
['val_loss: 0.6609145998954773', 'val_acc: 0.7792999744415283']


Epoch 13:


100%|██████████| 391/391 [00:19<00:00, 19.72it/s]


['loss: 0.4720948040485382', 'acc: 0.8334800004959106']
['val_loss: 0.6489681005477905', 'val_acc: 0.7829999923706055']


Epoch 14:


100%|██████████| 391/391 [00:19<00:00, 19.68it/s]


['loss: 0.45926210284233093', 'acc: 0.8385000228881836']
['val_loss: 0.6349931359291077', 'val_acc: 0.7888000011444092']


Epoch 15:


100%|██████████| 391/391 [00:19<00:00, 19.68it/s]


['loss: 0.44191446900367737', 'acc: 0.8429200053215027']
['val_loss: 0.6312381625175476', 'val_acc: 0.7924000024795532']


Epoch 16:


100%|██████████| 391/391 [00:19<00:00, 19.70it/s]


['loss: 0.4231436252593994', 'acc: 0.8511000275611877']
['val_loss: 0.6075559258460999', 'val_acc: 0.796500027179718']


Epoch 17:


100%|██████████| 391/391 [00:19<00:00, 19.65it/s]


['loss: 0.41186437010765076', 'acc: 0.8540599942207336']
['val_loss: 0.6086594462394714', 'val_acc: 0.7997000217437744']


Epoch 18:


100%|██████████| 391/391 [00:19<00:00, 20.26it/s]


['loss: 0.3922002911567688', 'acc: 0.8609600067138672']
['val_loss: 0.6075337529182434', 'val_acc: 0.8047999739646912']


Epoch 19:


100%|██████████| 391/391 [00:19<00:00, 20.28it/s]


['loss: 0.37786865234375', 'acc: 0.866919994354248']
['val_loss: 0.61519455909729', 'val_acc: 0.8008000254631042']




For this homework we tried many different setups regarding the optimization of last homeworks results. First we had a look at what difference each optimization technique alone had on model performance and in the end what combining these yielded on performance. So the basic network without any optimization has some overfitting, with training accuracy around 90% and validation accuracy around 70%. L2 regularization alone (we tried values 0.001; 0.01), very slightly improved model performance but not significantly. Then we tried different setups with dropout layers, one where we added dropout layers after each individual layer and in the second after each layer block and each with different dropout values (0.1 and 0.2). The model had the best performance with dropout layers after each individual layer with dropout value set to 0.1, it also helped with overfitting. Data augmentation alone had no significant impact on model performance. Lastly we added batch normalization before each layer block which also significantly improved performance and reduced overfitting. The best combination of techniques was batch norm before each layer block with dropout layers after each individual layer (value 0.1) and L2 regularization (value 0.001) resulting in training accuracy around 86% and validation accuracy around 80%, so the training-validation delta decreased from 20%P to 6%P. Values and graphs for every experiment can be seen in the tensorboard :)