In [41]:
import tensorflow as tf
import tensorflow_datasets as tfds
import datetime
import tqdm

# in a notebook, load the tensorboard extension, not needed for scripts
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [42]:
def get_cifar10(batch_size):
    """
    Load and prepare CIFAR-10 as a tensorflow dataset.
    Returns a train and a validation dataset.
    Args:
    batch_size (int)
    """
    train_ds, val_ds = tfds.load('cifar10', split=['train', 'test'], shuffle_files=True)

    one_hot = lambda x: tf.one_hot(x, 10)

    map_func = lambda x,y: (tf.cast(x, dtype=tf.float32)/255.,
                            tf.cast(one_hot(y),tf.float32))

    map_func_2 = lambda x: (x["image"],x["label"])

    train_ds = train_ds.map(map_func_2).map(map_func).cache()
    val_ds   = val_ds.map(map_func_2).map(map_func).cache()
    
    train_ds = train_ds.shuffle(4096).batch(batch_size)
    val_ds   = val_ds.shuffle(4096).batch(batch_size)

    return (train_ds.prefetch(tf.data.AUTOTUNE), val_ds.prefetch(tf.data.AUTOTUNE))

train_ds, val_ds = get_cifar10(128)

In [43]:
class DenselyConnectedBottleneckCNNLayer(tf.keras.layers.Layer):
  def __init__(self, num_filters, bottleneck_size):
    super(DenselyConnectedBottleneckCNNLayer, self).__init__()
    self.bottleneck = tf.keras.layers.Conv2D(filters=bottleneck_size, kernel_size=1, padding='same')
    self.conv = tf.keras.layers.Conv2D(filters=num_filters, kernel_size=3, padding='same', activation='relu')

  @tf.function
  def call(self, x):
    c = self.bottleneck(x)
    c = self.conv(c)
    x = tf.concat((x,c), axis=-1)
    return x

class DenselyConnectedBottleneckCNNBlock(tf.keras.layers.Layer):
  def __init__(self, num_filters, layers, bottleneck_size):
    super(DenselyConnectedBottleneckCNNBlock, self).__init__()

    self.layers = [DenselyConnectedBottleneckCNNLayer(num_filters, bottleneck_size) for _ in range(layers)]

  @tf.function
  def call(self, x):
    for layer in self.layers:
      x = layer(x)
    return x


class DenselyConnectedBottleneckCNN(tf.keras.Model):
  def __init__(self):
    super(DenselyConnectedBottleneckCNN, self).__init__()

    self.metrics_list = [tf.keras.metrics.Mean(name="loss"),
                             tf.keras.metrics.CategoricalAccuracy(name="acc")]
        
    self.optimizer = tf.keras.optimizers.Adam()
        
    self.loss_function = tf.keras.losses.CategoricalCrossentropy()

    self.denseblock1 = DenselyConnectedBottleneckCNNBlock(24,4, 24)
    self.pooling1 = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2)

    self.denseblock2 = DenselyConnectedBottleneckCNNBlock(24,4, 24)
    self.pooling2 = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2)

    self.denseblock3 = DenselyConnectedBottleneckCNNBlock(24,4, 24)
    self.globalpooling = tf.keras.layers.GlobalAvgPool2D()
    self.out = tf.keras.layers.Dense(10, activation='softmax')


  @tf.function
  def call(self,x):
    x = self.denseblock1(x)
    x = self.pooling1(x)
    x = self.denseblock2(x)
    x = self.pooling2(x)
    x = self.denseblock3(x)
    x = self.globalpooling(x)
    x = self.out(x)
    return x

  @property
  def metrics(self):
    return self.metrics_list
        # return a list with all metrics in the model

    # 4. reset all metrics objects
  def reset_metrics(self):
    for metric in self.metrics:
      metric.reset_states()

    # 5. train step method
  @tf.function
  def train_step(self, data):
    img, target = data
        
    with tf.GradientTape() as tape:
      output = self(img, training=True)
      loss = self.loss_function(target, output)
            
      gradients = tape.gradient(loss, self.trainable_variables)
      self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        # update loss metric
      self.metrics[0].update_state(loss)
        
        # for all metrics except loss, update states (accuracy etc.)
      for metric in self.metrics[1:]:
        metric.update_state(target, output)

        # Return a dictionary mapping metric names to current value
    return {m.name: m.result() for m in self.metrics}
    
  @tf.function
  def test_step(self, data):
    img, target = data

    output = self(img, training=False)
    loss = self.loss_function(target, output)

    self.metrics[0].update_state(loss)
        # for accuracy metrics:
    for metric in self.metrics[1:]:
      metric.update_state(target, output)

    return {m.name: m.result() for m in self.metrics}

In [49]:
class BasicCNNLayer(tf.keras.layers.Layer):
    def __init__(self, num_filters):
        super(BasicCNNLayer, self).__init__()
        self.conv = tf.keras.layers.Conv2D(filters=num_filters, kernel_size=3, padding='same', activation='relu')

    @tf.function
    def call(self, x):
        return self.conv(x)

class BasicCNNBlock(tf.keras.layers.Layer):
    def __init__(self, depth, layers):
        super(BasicCNNBlock, self).__init__()
        self.layers = [BasicCNNLayer(depth) for _ in range(layers)]

    @tf.function
    def call(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

class BasicConv(tf.keras.Model):
    def __init__(self):
        super().__init__()

        self.metrics_list = [tf.keras.metrics.Mean(name="loss"),
                            tf.keras.metrics.CategoricalAccuracy(name="acc")]
        
        self.optimizer = tf.keras.optimizers.Adam()
        
        self.loss_function = tf.keras.losses.CategoricalCrossentropy()

        self.basicblock1 = BasicCNNBlock(24,2)
        self.pooling1 = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2)

        self.basicblock2 = BasicCNNBlock(48,3)
        self.pooling2 = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2)

        self.basicblock3 = BasicCNNBlock(96,4)
        self.global_pool = tf.keras.layers.GlobalMaxPooling2D()

        self.out = tf.keras.layers.Dense(10, activation='softmax')

    @tf.function
    def call(self, x):
        x = self.basicblock1(x)
        x = self.pooling1(x)
        x = self.basicblock2(x)
        x = self.pooling2(x)
        x = self.basicblock3(x)
        x = self.global_pool(x)
        x = self.out(x)
        return x
     
    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model

    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    # 5. train step method
    @tf.function
    def train_step(self, data):
        img, target = data
        
        with tf.GradientTape() as tape:
            output = self(img, training=True)
            loss = self.loss_function(target, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        # update loss metric
        self.metrics[0].update_state(loss)
        
        # for all metrics except loss, update states (accuracy etc.)
        for metric in self.metrics[1:]:
            metric.update_state(target, output)

        # Return a dictionary mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}
    
    @tf.function
    def test_step(self, data):
        img, target = data

        output = self(img, training=False)
        loss = self.loss_function(target, output)

        self.metrics[0].update_state(loss)
        # for accuracy metrics:
        for metric in self.metrics[1:]:
            metric.update_state(target, output)

        return {m.name: m.result() for m in self.metrics}


In [45]:
class ResidualConnectedCNNLayer(tf.keras.layers.Layer):
    def __init__(self, num_filters):
        super(ResidualConnectedCNNLayer, self).__init__()
        self.conv = tf.keras.layers.Conv2D(filters=num_filters, kernel_size=3, padding='same', activation='relu')

    @tf.function
    def call(self, x):
        c = self.conv(x)
        x = c+x
        return x

class ResidualConnectedCNNBlock(tf.keras.layers.Layer):
    def __init__(self, depth, layers):
        super(ResidualConnectedCNNBlock, self).__init__()
        self.deeper_layer = tf.keras.layers.Conv2D(filters=depth, kernel_size=3, padding='same', activation='relu')
        self.layers = [ResidualConnectedCNNLayer(depth) for _ in range(layers)]

    @tf.function
    def call(self, x):
        x = self.deeper_layer(x)
        for layer in self.layers:
            x = layer(x)
        return x


class ResidualConnectedCNN(tf.keras.Model):
    def __init__(self):
        super(ResidualConnectedCNN, self).__init__()

        self.metrics_list = [tf.keras.metrics.Mean(name="loss"),
                            tf.keras.metrics.CategoricalAccuracy(name="acc")]

        self.optimizer = tf.keras.optimizers.Adam()

        self.loss_function = tf.keras.losses.CategoricalCrossentropy()

        self.residualblock1 = ResidualConnectedCNNBlock(32,4)
        self.pooling1 = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2)

        self.residualblock2 = ResidualConnectedCNNBlock(64,4)
        self.pooling2 = tf.keras.layers.MaxPooling2D(pool_size=2, strides=2)

        self.residualblock3 = ResidualConnectedCNNBlock(128,4)
        self.globalpooling = tf.keras.layers.GlobalAvgPool2D()

        self.out = tf.keras.layers.Dense(10, activation='softmax')

    @tf.function
    def call(self,x):
        x = self.residualblock1(x)
        x = self.pooling1(x)
        x = self.residualblock2(x)
        x = self.pooling2(x)
        x = self.residualblock3(x)
        x = self.globalpooling(x)
        x = self.out(x)
      
        return x

    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model

    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    # 5. train step method
    @tf.function
    def train_step(self, data):
        img, target = data
        
        with tf.GradientTape() as tape:
            output = self(img, training=True)
            loss = self.loss_function(target, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        # update loss metric
        self.metrics[0].update_state(loss)
        
        # for all metrics except loss, update states (accuracy etc.)
        for metric in self.metrics[1:]:
            metric.update_state(target, output)

        # Return a dictionary mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}
    
    @tf.function
    def test_step(self, data):
        img, target = data

        output = self(img, training=False)
        loss = self.loss_function(target, output)

        self.metrics[0].update_state(loss)
        # for accuracy metrics:
        for metric in self.metrics[1:]:
            metric.update_state(target, output)

        return {m.name: m.result() for m in self.metrics}

In [46]:
# Define where to save the log
config_name= "config_name"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/SGDwMomentum/train"
val_log_path = f"logs/{config_name}/{current_time}/SGDwMomentum/val"

# log writer for training metrics
train_summary_writer = tf.summary.create_file_writer(train_log_path)

# log writer for validation metrics
val_summary_writer = tf.summary.create_file_writer(val_log_path)

def training_loop(model, train_ds, val_ds, epochs, train_summary_writer, val_summary_writer):
    for epoch in range(epochs):
        print(f"Epoch {epoch}:")
        
        # Training:
        
        for data in tqdm.tqdm(train_ds, position=0, leave=True):
            metrics = model.train_step(data)
            
            # logging the validation metrics to the log file which is used by tensorboard
            with train_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

        # print the metrics
        print([f"{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # reset all metrics (requires a reset_metrics method in the model)
        model.reset_metrics()    
        
        # Validation:
        for data in val_ds:
            metrics = model.test_step(data)
        
            # logging the validation metrics to the log file which is used by tensorboard
            with val_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)
                    
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # reset all metrics
        model.reset_metrics()
        print("\n")

In [47]:
%tensorboard --logdir logs/

Reusing TensorBoard on port 6006 (pid 1968), started 0:20:50 ago. (Use '!kill 1968' to kill it.)

In [50]:
model = BasicConv()

training_loop(model=model,
                train_ds=train_ds, 
                val_ds=val_ds, 
                epochs=15, 
                train_summary_writer=train_summary_writer, 
                val_summary_writer=val_summary_writer)

Epoch 0:


100%|██████████| 391/391 [01:42<00:00,  3.82it/s]


['loss: 1.762391209602356', 'acc: 0.34516000747680664']
['val_loss: 1.4260337352752686', 'val_acc: 0.47110000252723694']


Epoch 1:


100%|██████████| 391/391 [01:41<00:00,  3.84it/s]


['loss: 1.3145231008529663', 'acc: 0.5218999981880188']
['val_loss: 1.183062195777893', 'val_acc: 0.5651999711990356']


Epoch 2:


100%|██████████| 391/391 [01:43<00:00,  3.79it/s]


['loss: 1.1050620079040527', 'acc: 0.6042199730873108']
['val_loss: 1.0284942388534546', 'val_acc: 0.6355999708175659']


Epoch 3:


  4%|▍         | 16/391 [00:04<01:46,  3.53it/s]


KeyboardInterrupt: 