In [2]:
import tensorflow as tf
import numpy as np


In [170]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data(label_mode='fine')

In [171]:
x_train = x_train-np.mean(x_train,axis=0)
x_test = x_test - np.mean(x_test,axis=0)

In [172]:
x_train.shape

(50000, 32, 32, 3)

In [310]:
from tensorflow.keras.layers import Dense, Conv2D, GlobalAveragePooling2D, BatchNormalization
input_shape = (32,32,3)
"""
For training on cifar10/cifar100 data
create a plain network model, 6n+2 layers,
"""
def create_model(input_shape,n):
    model = tf.keras.Sequential()
    model.add(Conv2D(filters=16,kernel_size=(3, 3),input_shape=input_shape,activation='relu'
             ,kernel_initializer=tf.keras.initializers.HeNormal(),padding="same",use_bias=True))
    for i in range(2*n):
        model.add(Conv2D(filters=16,kernel_size=(3, 3),activation='relu',padding="same"
                     ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal()))
    model.add(Conv2D(filters=32,kernel_size=(3, 3),activation='relu',strides=(2,2),padding="same"
             ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal()))
    for i in range(2*n-1):
        model.add(Conv2D(filters=32,kernel_size=(3, 3),activation='relu',padding="same"
                     ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal()))
    model.add(Conv2D(filters=64,kernel_size=(3, 3),activation='relu',strides=(2,2),padding="same"
             ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal()))
    for i in range(2*n-1):
        model.add(Conv2D(filters=64,kernel_size=(3, 3),activation='relu',padding="same"
                     ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal()))
    model.add(GlobalAveragePooling2D())
    model.add(BatchNormalization())
    model.add(Dense(100,activation='softmax'))
    return model

In [311]:
model = create_model(input_shape,3)

In [313]:
model(x_train[1:3])

<tf.Tensor: shape=(2, 100), dtype=float32, numpy=
array([[6.38601274e-16, 5.94529851e-14, 1.84309363e-24, 7.04296315e-17,
        1.30081588e-17, 2.81793123e-15, 8.36956449e-10, 8.45094827e-12,
        4.22094178e-25, 1.04983708e-12, 4.03955694e-19, 5.96330494e-31,
        2.21559132e-10, 4.21056136e-21, 2.77805837e-28, 6.35065589e-19,
        2.12544687e-23, 2.31435912e-10, 1.03491773e-20, 1.82689159e-21,
        8.94365657e-15, 5.46056689e-13, 2.83260422e-04, 5.31081717e-16,
        3.31741052e-22, 2.34486111e-11, 3.90192241e-07, 8.01006765e-16,
        1.78085960e-20, 5.48781533e-14, 1.30555221e-19, 1.87901517e-09,
        8.75484238e-19, 1.90373543e-22, 2.65624344e-16, 3.90123345e-13,
        5.64213999e-21, 9.84520576e-20, 9.83486567e-16, 1.91014322e-23,
        1.94508791e-20, 4.17352649e-13, 4.40963192e-15, 7.90996810e-28,
        8.31963306e-22, 1.49030500e-04, 3.82558343e-26, 6.03917705e-29,
        2.48749605e-17, 1.27221751e-21, 9.54995217e-19, 8.73435808e-27,
        1.0083

In [86]:
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [302]:
# model.fit(x=x_train, 
#           y=y_train,
#           batch_size=128,
#           epochs=10, 
#           validation_data=(x_test, y_test))

In [296]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Conv2D, GlobalAveragePooling2D, BatchNormalization, ReLU

class ResNet(Model):

    def __init__(self, num_block, input_shape, output_size=100):
        '''
        For training the cifar10/cifar100 data in the ResNet paper
        input_shape: The size of the input. (img_len, img_len, channel_num).
        output_size: The size of the output. It should be equal to the number of classes.
        '''
        super(ResNet, self).__init__()
        #############################################################
        # TODO: Define layers for your custom LeNet network         
        # Hint: Try adding additional convolution and avgpool layers
        #############################################################
        self.num_block = num_block
        self.layer = dict()
        self.layer['conv1'] = Conv2D(filters=16,kernel_size=(3, 3),input_shape=input_shape,activation='relu'
                                ,kernel_initializer=tf.keras.initializers.HeNormal(),padding="same",use_bias=True)

        for i in range(2*num_block):
            self.layer['conv1_%d'%(i+1)] = Conv2D(filters=16,kernel_size=(3, 3),activation='relu',padding="same"
                                    ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())

        self.layer['conv2_1'] = Conv2D(filters=32,kernel_size=(3, 3),activation='relu',strides=(2,2),padding="same"
             ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())
        for i in range(2*num_block-1):
            self.layer['conv2_%d'%(i+2)] = Conv2D(filters=32,kernel_size=(3, 3),activation='relu',padding="same"
                     ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())
        
        self.layer['conv3_1'] = Conv2D(filters=64,kernel_size=(3, 3),activation='relu',strides=(2,2),padding="same"
             ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())
        for i in range(2*num_block-1):
            self.layer['conv3_%d'%(i+2)] = Conv2D(filters=64,kernel_size=(3, 3),activation='relu',padding="same"
                     ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())

        
        self.layer['avgpool'] = GlobalAveragePooling2D()
        self.layer['bn'] = BatchNormalization()
        self.layer['fc'] = Dense(100,activation='softmax')
    
        #############################################################
        #                          END TODO                         #                                              
        #############################################################

    
    def call(self, x):
        '''
        x: input to LeNet model.
        '''
        #call function returns forward pass output of the network
        #############################################################
        # TODO: Implement forward pass for custom network defined 
        # in __init__ and return network output
        #############################################################
        num_block = self.num_block
        x = self.layer['conv1'](x)
        x = BatchNormalization()(x)
        x_prev = x
        
        for i in range(2*num_block):
            if i%2 == 0:
                x = self.layer['conv1_%d'%(i+1)](x)
                x = BatchNormalization()(x)
            else:
                x = ReLU()(self.layer['conv1_%d'%(i+1)](x) + x_prev)
                x = BatchNormalization()(x)
                x_prev = x
        


        for i in range(2*num_block):
            if i%2 == 0:
                x = self.layer['conv2_%d'%(i+1)](x)
                x = BatchNormalization()(x)
            else:
                if x.shape == x_prev.shape:
                    x =  ReLU()(self.layer['conv2_%d'%(i+1)](x) + x_prev)
                    x = BatchNormalization()(x)
                    x_prev = x
                else:
                    x_prev = Conv2D(filters=32,kernel_size=(1, 1),strides=(2, 2)
                            ,use_bias=False,kernel_initializer=tf.keras.initializers.Ones())(x_prev)/16
                    x =  ReLU()(self.layer['conv2_%d'%(i+1)](x) + x_prev)
                    x = BatchNormalization()(x)
                    x_prev = x

        for i in range(2*num_block):
            if i%2 == 0:
                x = self.layer['conv3_%d'%(i+1)](x)
                x = BatchNormalization()(x)
            else:
                if x.shape == x_prev.shape:
                    x =  ReLU()(self.layer['conv3_%d'%(i+1)](x) + x_prev)
                    x = BatchNormalization()(x)
                    x_prev = x
                else:
                    x_prev = Conv2D(filters=64,kernel_size=(1, 1),strides=(2, 2)
                            ,use_bias=False,kernel_initializer=tf.keras.initializers.Ones())(x_prev)/32
                    x =  ReLU()(self.layer['conv3_%d'%(i+1)](x) + x_prev)
                    x = BatchNormalization()(x)
                    x_prev = x

        x = self.layer['avgpool'](x)
        x = self.layer['bn'](x)
        out = self.layer['fc'](x)
        
        return out

In [305]:
import time
model = ResNet(num_block=1, input_shape=input_shape)


optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()

batch_size = 128
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

epochs = 5

history = [[],[],[]]
for epoch in range(epochs):
    print("Epoch %d/%d" % (epoch+1,epochs))
    
    start_time = time.time()

    for step, (x_batch_train, y_batch_train) in enumerate(train_data):
        with tf.GradientTape() as tape:
            logits = model(x_batch_train, training=True)
            loss_value = loss_fn(y_batch_train, logits)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        train_acc_metric.update_state(y_batch_train, logits)
        if step % 400 == 0:
            print(
                "Training loss at step %d: %.4f"
                % (step, float(loss_value))
            )

    history[0].append(loss_value)
    train_acc = train_acc_metric.result()
    train_acc_metric.reset_states()

    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val, training=False)
        val_acc_metric.update_state(y_batch_val, val_logits)
    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    history[1].append(train_acc)
    history[2].append(val_acc)
    print("Training accuracy: %.4f" % (float(train_acc),)
          ,"Validation accuracy: %.4f" % (float(val_acc),),"Time taken: %.2fs" % (time.time() - start_time))

Epoch 1/5
Training loss at step 0: 5.2383
Training accuracy: 0.1220 Validation accuracy: 0.0087 Time taken: 145.43s
Epoch 2/5
Training loss at step 0: 3.3683
Training accuracy: 0.2279 Validation accuracy: 0.0100 Time taken: 142.66s
Epoch 3/5
Training loss at step 0: 2.9697
Training accuracy: 0.2871 Validation accuracy: 0.0100 Time taken: 141.12s
Epoch 4/5
Training loss at step 0: 2.7919
Training accuracy: 0.3289 Validation accuracy: 0.0100 Time taken: 131.99s
Epoch 5/5
Training loss at step 0: 2.6243


KeyboardInterrupt: 

In [266]:
x = Conv2D(filters=16,kernel_size=(3, 3),input_shape=input_shape,activation='relu'
                                ,kernel_initializer=tf.keras.initializers.HeNormal(),padding="same",use_bias=True)(x)
x_pre = x

In [267]:
x = Conv2D(filters=16,kernel_size=(3, 3),activation='relu',padding="same"
                                    ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())(x)
x = Conv2D(filters=16,kernel_size=(3, 3),activation='relu',padding="same"
                                    ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())(x) + x_pre
x =  ReLU()(x)
x_pre = x

In [268]:
x = Conv2D(filters=32,kernel_size=(3, 3),activation='relu',strides=(2,2),padding="same"
             ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())(x)
x_pre = Conv2D(filters=32,kernel_size=(1, 1),strides=(2, 2)
                            ,use_bias=False,kernel_initializer=tf.keras.initializers.Ones())(x_pre)/16
x = Conv2D(filters=32,kernel_size=(3, 3),activation='relu',padding="same"
                     ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())(x) + x_pre
x =  ReLU()(x)
x_pre = x

In [269]:
x = Conv2D(filters=64,kernel_size=(3, 3),activation='relu',strides=(2,2),padding="same"
             ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())(x)
x_pre = Conv2D(filters=64,kernel_size=(1, 1),strides=(2, 2)
                            ,use_bias=False,kernel_initializer=tf.keras.initializers.Ones())(x_pre)/32
x = Conv2D(filters=64,kernel_size=(3, 3),activation='relu',padding="same"
                     ,use_bias=True,kernel_initializer=tf.keras.initializers.HeNormal())(x) + x_pre
x =  ReLU()(x)
x_pre = x

In [270]:
x = GlobalAveragePooling2D()(x)

In [271]:
x = Dense(100,activation='softmax')(x)

In [272]:
x

<tf.Tensor: shape=(3, 100), dtype=float32, numpy=
array([[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        3.2832270e-07, 0.0000000e+00, 0.0000000e+00, 3.9675338e-03,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        9.9603206e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00