# Real Valued CNN with similar number of parameters

In [1]:
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from functools import partial
from keras.preprocessing.image import ImageDataGenerator
from keras.datasets import cifar10
from sklearn.metrics import accuracy_score
from keras.callbacks import LearningRateScheduler, ReduceLROnPlateau

In [2]:
(Xtr_cifar, ytr_cifar), (Xte_cifar, yte_cifar) = cifar10.load_data()
Xtr_cifar = Xtr_cifar / 255
Xte_cifar = Xte_cifar / 255

Xtr_cifar -= np.mean(Xtr_cifar, axis=0)
Xte_cifar -= np.mean(Xtr_cifar, axis=0)

n_classes = 10
ytr_cifar = keras.utils.to_categorical(ytr_cifar, num_classes=n_classes)
yte_cifar = keras.utils.to_categorical(yte_cifar, num_classes=n_classes)

In [3]:
DefaultConv2D = partial(keras.layers.Conv2D, 
                        kernel_size=3, 
                        strides=1, 
                        padding="SAME", 
                        use_bias=False, 
                        kernel_initializer='he_uniform',
                        kernel_regularizer=keras.regularizers.l2(1e-3))

In [4]:
def learning_rate(epoch):
    lr = 1e-2
    if epoch < 151 and epoch > 9:
        lr *= 10.
    elif epoch > 199:
        lr /= 10.
    print('Learning rate: ', lr)
    return lr

In [5]:
class ResidualUnit(keras.layers.Layer):
    def __init__(self, filters, strides=1, conv_first=True, activation="elu", include_bn=True, **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.conv_first = conv_first
        self.include_bn = include_bn

        self.main_layers = []
        if self.conv_first:
            self.main_layers.append(DefaultConv2D(filters, strides=strides))
            if self.include_bn:
                self.main_layers.append(keras.layers.BatchNormalization())
            self.main_layers.append(self.activation)
            self.main_layers.append(DefaultConv2D(filters))
            if self.include_bn:
                self.main_layers.append(keras.layers.BatchNormalization())
        else:
            if self.include_bn:
                self.main_layers.append(keras.layers.BatchNormalization())
            self.main_layers.append(self.activation)
            self.main_layers.append(DefaultConv2D(filters, strides=strides))
            if self.include_bn:
                self.main_layers.append(keras.layers.BatchNormalization())
            self.main_layers.append(self.activation)
            self.main_layers.append(DefaultConv2D(filters))

        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [DefaultConv2D(filters, kernel_size=1, strides=strides)]
            if self.include_bn:
                self.skip_layers.append(keras.layers.BatchNormalization())

    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        if self.conv_first:
            return self.activation(Z + skip_Z)
        else:
            return Z + skip_Z

In [6]:
model = keras.models.Sequential()
model.add(DefaultConv2D(12, kernel_size=3, strides=1,
                        input_shape=[32, 32, 3]))
prev_filters = 12
for filters in [12] * 3 + [24] * 2 + [48] * 2:
    strides = 1 if filters == prev_filters else 2
    model.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters
model.add(keras.layers.GlobalAvgPool2D())
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(10, activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.SGD(learning_rate=learning_rate(0)), metrics=["accuracy"])

Learning rate:  0.01


In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 12)        324       
_________________________________________________________________
residual_unit (ResidualUnit) (None, 32, 32, 12)        2688      
_________________________________________________________________
residual_unit_1 (ResidualUni (None, 32, 32, 12)        2688      
_________________________________________________________________
residual_unit_2 (ResidualUni (None, 32, 32, 12)        2688      
_________________________________________________________________
residual_unit_3 (ResidualUni (None, 16, 16, 24)        8352      
_________________________________________________________________
residual_unit_4 (ResidualUni (None, 16, 16, 24)        10560     
_________________________________________________________________
residual_unit_5 (ResidualUni (None, 8, 8, 48)          3

In [8]:
lr_scheduler = LearningRateScheduler(learning_rate)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)
callbacks = [lr_reducer,lr_scheduler]

In [9]:
#data augmentation
datagen = ImageDataGenerator(width_shift_range=0.125, height_shift_range=0.125, horizontal_flip=True)
datagen.fit(Xtr_cifar)

In [10]:
batch_size = 128
epochs = 250

model.fit(datagen.flow(Xtr_cifar, ytr_cifar, batch_size=batch_size), epochs=epochs, validation_data=(Xte_cifar,yte_cifar), callbacks=callbacks)

Epoch 1/250
Learning rate:  0.01
Epoch 2/250
Learning rate:  0.01
Epoch 3/250
Learning rate:  0.01
Epoch 4/250
Learning rate:  0.01
Epoch 5/250
Learning rate:  0.01
Epoch 6/250
Learning rate:  0.01
Epoch 7/250
Learning rate:  0.01
Epoch 8/250
Learning rate:  0.01
Epoch 9/250
Learning rate:  0.01
Epoch 10/250
Learning rate:  0.01
Epoch 11/250
Learning rate:  0.1
Epoch 12/250
Learning rate:  0.1
Epoch 13/250
Learning rate:  0.1
Epoch 14/250
Learning rate:  0.1
Epoch 15/250
Learning rate:  0.1
Epoch 16/250
Learning rate:  0.1
Epoch 17/250
Learning rate:  0.1
Epoch 18/250
Learning rate:  0.1
Epoch 19/250
Learning rate:  0.1
Epoch 20/250
Learning rate:  0.1
Epoch 21/250
Learning rate:  0.1
Epoch 22/250
Learning rate:  0.1
Epoch 23/250
Learning rate:  0.1
Epoch 24/250
Learning rate:  0.1
Epoch 25/250
Learning rate:  0.1
Epoch 26/250
Learning rate:  0.1
Epoch 27/250
Learning rate:  0.1
Epoch 28/250
Learning rate:  0.1
Epoch 29/250
Learning rate:  0.1
Epoch 30/250
Learning rate:  0.1
Epoch 31/

<tensorflow.python.keras.callbacks.History at 0x7f47e026c690>

In [11]:
accuracy_score(np.argmax(ytr_cifar, axis=1),np.argmax(model.predict(Xtr_cifar),axis=1))

0.96178

In [12]:
accuracy_score(np.argmax(yte_cifar,axis=1),np.argmax(model.predict(Xte_cifar),axis=1))

0.8432