# All-Convolutional Net for the CIFAR-10 dataset

In this notebook, the model in this notebook is inspired by the All Convolutional Net model

- J. T. Springenberg et al., 2015, Striving for Simplicity: The All Convolutional Net

In [1]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.datasets import cifar10
from keras.callbacks import ModelCheckpoint

import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Data Preparation

In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

print('X shape :', x_train.shape)
print(len(x_train), 'train samples')
print(len(x_test), 'test samples')

X shape : (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [3]:
x_train_mean = np.mean(x_train, axis=0)
x_train_std = np.std(x_train, axis=0)

x_train = (x_train - x_train_mean)/x_train_std
x_test = (x_test - x_train_mean)/x_train_std

n_y = 10
y_train = keras.utils.to_categorical(y_train, n_y)
y_test = keras.utils.to_categorical(y_test, n_y)

# Model

In [17]:
input_shape = x_train.shape[1:]
activation = 'relu'
padding = 'same'

model = Sequential()

model.add(Conv2D(64, (3,3), padding=padding, input_shape=input_shape, activation=activation))
model.add(Conv2D(64, (3,3), padding=padding, activation=activation))
model.add(Conv2D(64, (3,3), padding=padding, strides=2, activation=activation))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3,3), padding=padding, activation=activation))
model.add(Conv2D(128, (3,3), padding=padding, activation=activation))
model.add(Conv2D(128, (3,3), padding=padding, strides=2, activation=activation))
model.add(Dropout(0.25))

model.add(Conv2D(256, (3,3), padding=padding, activation=activation))
model.add(Conv2D(256, (3,3), padding=padding, activation=activation))
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(n_y, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_39 (Conv2D)           (None, 32, 32, 64)        1792      
_________________________________________________________________
conv2d_40 (Conv2D)           (None, 32, 32, 64)        36928     
_________________________________________________________________
conv2d_41 (Conv2D)           (None, 16, 16, 64)        36928     
_________________________________________________________________
dropout_15 (Dropout)         (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_42 (Conv2D)           (None, 16, 16, 128)       73856     
_________________________________________________________________
conv2d_43 (Conv2D)           (None, 16, 16, 128)       147584    
_________________________________________________________________
conv2d_44 (Conv2D)           (None, 8, 8, 128)         147584    
__________

# Model training

In [18]:
#optimizer = keras.optimizers.SGD(0.01, momentum=0.9)
optimizer = keras.optimizers.adam(lr=0.001)
model.compile(optimizer, keras.losses.categorical_crossentropy, ['accuracy'])

shift = 4/32
generator = ImageDataGenerator(rotation_range=10, width_shift_range=shift, height_shift_range=shift, 
                               horizontal_flip=True)

batch_size = 64
n_steps = x_train.shape[0]//batch_size

save_path = './Model_trained/All_CNN_model_cifar10.h5'
ckeckpoint = ModelCheckpoint(save_path, save_best_only=True)
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=25, verbose=1)

# def schedule(epoch):
#     if epoch < 10:
#         return 0.01
#     elif epoch < 25:
#         return 0.005
#     else:
#         return 0.001
# lr_scheduler = keras.callbacks.LearningRateScheduler(schedule)


t0 = time.time()
model.fit_generator(generator.flow(x_train, y_train, batch_size=batch_size), steps_per_epoch=n_steps, 
                    epochs=200, validation_data=(x_test, y_test), 
                    callbacks=[ckeckpoint, early_stopping])
print('Total training time : %.3f s' %(time.time()-t0))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

KeyboardInterrupt: 

# Conclusion

After testing different architecture, it is clear that these all-convolutional models are much slower to converge to a low cost value. 
I don't have the patience to train my model for more than 300 epoch (like in the original paper), which got them 90-91% accuracy. I only got ~85% accuracy after ~50 epoch. 