In [29]:
from keras.datasets import cifar10

In [30]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [31]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from keras.models import Sequential, Model

from keras.utils import np_utils
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout,ZeroPadding2D, Add
from keras.layers import LeakyReLU
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.engine.input_layer import Input
from tensorflow.keras.layers import BatchNormalization
from keras.initializers import glorot_uniform

In [32]:
NUM_CLASSES = 10
cifar10_classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
cols = 8
rows = 2
fig = plt.figure(figsize=(2 * cols - 1, 2.5 * rows - 1))

<Figure size 1080x288 with 0 Axes>

In [33]:
for i in range(cols):
    for j in range(rows):
        random_index = np.random.randint(0, len(y_train))
        ax = fig.add_subplot(rows, cols, i * rows + j + 1)
        ax.grid('off')
        ax.axis('off')
        ax.imshow(x_train[random_index, :])
        ax.set_title(cifar10_classes[y_train[random_index, 0]])
    plt.show()

In [161]:
X_train = x_train.astype('float32')
X_test = x_test.astype('float32')

X_train/=255
X_test/=255

Y_train = np_utils.to_categorical(y_train, len(cifar10_classes))
Y_test = np_utils.to_categorical(y_test, len(cifar10_classes))

x_val = X_train[:10000]
partial_x_train = X_train[10000:]
y_val = Y_train[:10000]
partial_y_train = Y_train[10000:]

In [162]:
gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=8, 
    width_shift_range=0.08, 
    shear_range=0.3,
    height_shift_range=0.08, 
    zoom_range=0.08)

val_gen = ImageDataGenerator(rescale=1./255)


train_generator = gen.flow(partial_x_train, partial_y_train, batch_size =64)
val_generator = val_gen.flow(x_val, y_val, batch_size=64)

In [163]:
def option1(X):
    # First component
    X = ZeroPadding2D(padding=(1,1), data_format=None)(X)
    X = Conv2D(32, (5, 5), strides = (1,1), name = 'one')(X)
    X = BatchNormalization(axis = 3, name = 'one1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((5,5), strides=(1,1))(X)
    
    return X

In [164]:
def option2(X):
    
    #First Component
    X = ZeroPadding2D(padding=(1,1), data_format=None)(X)
    X = Conv2D(16, (1,1), strides=(1,1), name='two', padding='same')(X)
    X = BatchNormalization(axis=3, name='two2')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((5,5), strides=(1,1))(X)
        
    # Second Component
    X = ZeroPadding2D(padding=(5,5), data_format=None)(X)
    X = Conv2D(32, (5,5), strides=(1,1), name='twoo', padding='same')(X)
    X = BatchNormalization(axis=3, name='tw2')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((5,5), strides=(1,1))(X)
    
    return X

In [165]:
X_input = Input((28,28,192))

In [166]:
# Stage 1
X = ZeroPadding2D(padding=(1,1), data_format=None)(X_input)
X = Conv2D(64, (1,1), strides=(2,2), name='conv1', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name='bn_conv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((5,5), strides=(2,2))(X)

In [167]:
X_shortcut = X_input

In [168]:
X = option1(X_input)

## Option 1

In [169]:
model = Model(inputs=X_input, outputs=X, name='Option 1')
model.summary()

Model: "Option 1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_14 (InputLayer)       [(None, 28, 28, 192)]     0         
                                                                 
 zero_padding2d_28 (ZeroPadd  (None, 30, 30, 192)      0         
 ing2D)                                                          
                                                                 
 one (Conv2D)                (None, 26, 26, 32)        153632    
                                                                 
 one1 (BatchNormalization)   (None, 26, 26, 32)        128       
                                                                 
 activation_37 (Activation)  (None, 26, 26, 32)        0         
                                                                 
 max_pooling2d_19 (MaxPoolin  (None, 22, 22, 32)       0         
 g2D)                                                     

In [170]:

X = ZeroPadding2D(padding=(1,1), data_format=None)(X_input)
X = Conv2D(64, (1,1), strides=(2,2), name='conv1', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name='bn_conv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((5,5), strides=(2,2))(X)

In [171]:
X_shortcut = X_input

In [172]:
X = option2(X_input)

## Option 2

In [173]:
model = Model(inputs=X_input, outputs=X, name='Option 2')
model.summary()

Model: "Option 2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_14 (InputLayer)       [(None, 28, 28, 192)]     0         
                                                                 
 zero_padding2d_30 (ZeroPadd  (None, 30, 30, 192)      0         
 ing2D)                                                          
                                                                 
 two (Conv2D)                (None, 30, 30, 16)        3088      
                                                                 
 two2 (BatchNormalization)   (None, 30, 30, 16)        64        
                                                                 
 activation_39 (Activation)  (None, 30, 30, 16)        0         
                                                                 
 max_pooling2d_21 (MaxPoolin  (None, 26, 26, 16)       0         
 g2D)                                                     

<br/>

From the above results we can see that option 1 has a significantly higher number of params than option 2. In option 1 we have one 5x5 32 filter which does not limit the input channels.
In option 2 there is one 1x1 colvolution before 3x3 which limits the channels.

This is a very useful technique becasue we can reduce the computational cost significantly. 