In [29]:
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.advanced_activations import LeakyReLU, PReLU, ParametricSoftplus
from keras.utils import np_utils, generic_utils
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [122]:
batch_size = 64
nb_classes = 10
nb_epoch = 100
data_augmentation = False

# shape of the image (SHAPE x SHAPE)
shapex, shapey = 32, 32
# number of convolutional filters to use at each layer
nb_filters = [32, 64]
# level of pooling to perform at each layer (POOL x POOL)
nb_pool = [2, 2]
# level of convolution to perform at each layer (CONV x CONV)
nb_conv = [3, 3]
# the CIFAR10 images are RGB
image_dimensions = 3

# the data, shuffled and split between tran and test sets
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

X_train shape: (50000, 3, 32, 32)
50000 train samples
10000 test samples


In [123]:
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

model = Sequential()

model.add(Convolution2D(32, 3, 3, 3, border_mode='full'))
model.add(LeakyReLU(alpha=0.2))
model.add(Convolution2D(32, 32, 3, 3))
model.add(LeakyReLU(alpha=0.2))
model.add(MaxPooling2D(poolsize=(2,2)))
model.add(Dropout(0.2))

model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
model.add(LeakyReLU(alpha=0.2))
model.add(Convolution2D(64, 64, 3, 3))
model.add(LeakyReLU(alpha=0.2))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.3))

model.add(Convolution2D(128, 64, 3, 3, border_mode='full'))
model.add(LeakyReLU(alpha=0.2))
model.add(Convolution2D(128, 128, 3, 3))
model.add(LeakyReLU(alpha=0.2))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(128 * (shapex / 2**3) * (shapey / 2**3), 512))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(512, nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', class_mode='categorical')

if not data_augmentation:
    print("Not using data augmentation or normalization")
    
    checkpointer = ModelCheckpoint(filepath='cifar10_cnn_keras_weights.hdf5', verbose=1, save_best_only=True)
    earlystopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    X_train = X_train.astype("float32")
    X_test = X_test.astype("float32")
    X_train /= 255
    X_test /= 255
    model.fit(X_train, Y_train, 
              batch_size=batch_size, 
              nb_epoch=nb_epoch, 
              show_accuracy=True,
              validation_data=(X_test, Y_test),
              callbacks=[checkpointer, earlystopping])

else:
    print("Using real time data augmentation")

    # this will do preprocessing and realtime data augmentation
    datagen = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=True,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)
    datagen.fit(X_train)
    
    current_test_loss = 1000
    max_test_loss = 1000
    patience_count = 0

    for e in range(nb_epoch):
        print('-'*40)
        print('Epoch', e)
        print('-'*40)
        print("Training...")
        # batch train with realtime data augmentation
        progbar = generic_utils.Progbar(X_train.shape[0])
        for X_batch, Y_batch in datagen.flow(X_train, Y_train, batch_size=batch_size):
            train_res = model.train_on_batch(X_batch, Y_batch, accuracy=True)
            progbar.add(X_batch.shape[0], values=[("train loss", train_res[0]), ("train acc", train_res[1])])

        print("Testing...")
        # test time!
        progbar = generic_utils.Progbar(X_test.shape[0])
        for X_batch, Y_batch in datagen.flow(X_test, Y_test, batch_size=batch_size):
            test_res = model.test_on_batch(X_batch, Y_batch, accuracy=True)
            current_test_loss = test_res[0]
            progbar.add(X_batch.shape[0], values=[("test loss", test_res[0]), ("test acc", test_res[1])])
            
        print(current_test_loss, max_test_loss)
        if current_test_loss < max_test_loss:
            print('Saving weights to file.')
            model.save_weights('cifar10_cnn_keras_weights.hdf5', overwrite=True)
            patience_count = 0
            max_test_loss = current_test_loss
        elif patience_count > 20:
            print('\nEarly stopping.')
            break
        else:
            patience_count += 1

Not using data augmentation or normalization
Train on 50000 samples, validate on 10000 samples
Epoch 1/100
Epoch 00000: val_loss improved from inf to 1.01334, saving model to cifar10_cnn_keras_weights.hdf5
Epoch 2/100
Epoch 00001: val_loss improved from 1.01334 to 0.80611, saving model to cifar10_cnn_keras_weights.hdf5
Epoch 3/100
Epoch 00002: val_loss improved from 0.80611 to 0.69062, saving model to cifar10_cnn_keras_weights.hdf5
Epoch 4/100
Epoch 00003: val_loss improved from 0.69062 to 0.64301, saving model to cifar10_cnn_keras_weights.hdf5
Epoch 5/100
Epoch 00004: val_loss improved from 0.64301 to 0.60868, saving model to cifar10_cnn_keras_weights.hdf5
Epoch 6/100
Epoch 00005: val_loss improved from 0.60868 to 0.57672, saving model to cifar10_cnn_keras_weights.hdf5
Epoch 7/100
Epoch 00006: val_loss improved from 0.57672 to 0.57468, saving model to cifar10_cnn_keras_weights.hdf5
Epoch 8/100
Epoch 00007: val_loss improved from 0.57468 to 0.57232, saving model to cifar10_cnn_keras_we

In [113]:
import h5py
import json
import gzip

layer_name_dict = {
    'Dense': 'denseLayer',
    'Dropout': 'dropoutLayer',
    'Flatten': 'flattenLayer',
    'Embedding': 'embeddingLayer',
    'BatchNormalization': 'batchNormalizationLayer',
    'LeakyReLU': 'leakyReLULayer',
    'PReLU': 'parametricReLULayer',
    'ParametricSoftplus': 'parametricSoftplusLayer',
    'ThresholdedLinear': 'thresholdedLinearLayer',
    'ThresholdedReLu': 'thresholdedReLuLayer',
    'LSTM': 'rLSTMLayer',
    'GRU': 'rGRULayer',
    'JZS1': 'rJZS1Layer',
    'JZS2': 'rJZS2Layer',
    'JZS3': 'rJZS3Layer',
    'Convolution2D': 'convolution2DLayer',
    'MaxPooling2D': 'maxPooling2DLayer'
}

layer_params_dict = {
    'Dense': ['weights', 'activation'],
    'Dropout': ['p'],
    'Flatten': [],
    'Embedding': ['weights'],
    'BatchNormalization': ['weights', 'epsilon'],
    'LeakyReLU': ['alpha'],
    'PReLU': ['weights'],
    'ParametricSoftplus': ['weights'],
    'ThresholdedLinear': ['theta'],
    'ThresholdedReLu': ['theta'],
    'LSTM': ['weights', 'activation', 'inner_activation'],
    'GRU': ['weights', 'activation', 'inner_activation'],
    'JZS1': ['weights', 'activation', 'inner_activation'],
    'JZS2': ['weights', 'activation', 'inner_activation'],
    'JZS3': ['weights', 'activation', 'inner_activation'],
    'Convolution2D': ['weights', 'nb_filter', 'stack_size', 'nb_row', 'nb_col', 'border_mode', 'subsample', 'activation'],
    'MaxPooling2D': ['poolsize', 'stride', 'ignore_border']
}

layer_weights_dict = {
    'Dense': ['W', 'b'],
    'Embedding': ['E'],
    'BatchNormalization': ['gamma', 'beta', 'mean', 'std'],
    'PReLU': ['alphas'],
    'ParametricSoftplus': ['alphas', 'betas'],
    'LSTM': ['W_xi', 'W_hi', 'b_i', 'W_xc', 'W_hc', 'b_c', 'W_xf', 'W_hf', 'b_f', 'W_xo', 'W_ho', 'b_o'],
    'GRU': ['W_xz', 'W_hz', 'b_z', 'W_xr', 'W_hr', 'b_r', 'W_xh', 'W_hh', 'b_h'],
    'JZS1': ['W_xz', 'b_z', 'W_xr', 'W_hr', 'b_r', 'W_hh', 'b_h', 'Pmat'],
    'JZS2': ['W_xz', 'W_hz', 'b_z', 'W_hr', 'b_r', 'W_xh', 'W_hh', 'b_h', 'Pmat'],
    'JZS3': ['W_xz', 'W_hz', 'b_z', 'W_xr', 'W_hr', 'b_r', 'W_xh', 'W_hh', 'b_h'],
    'Convolution2D': ['W', 'b']
}

def serialize(model_json_file, weights_hdf5_file, save_filepath, compress):
    with open(model_json_file, 'r') as f:
        model_metadata = json.load(f)
    weights_file = h5py.File(weights_hdf5_file, 'r')

    layers = []

    num_activation_layers = 0
    for k, layer in enumerate(model_metadata['layers']):
        if layer['name'] == 'Activation':
            num_activation_layers += 1
            prev_layer_name = model_metadata['layers'][k-1]['name']
            idx_activation = layer_params_dict[prev_layer_name].index('activation')
            layers[k-num_activation_layers]['parameters'][idx_activation] = layer['activation']
            continue

        layer_params = []

        for param in layer_params_dict[layer['name']]:
            if param == 'weights':
                layer_weights = list(weights_file.get('layer_{}'.format(k)))
                weights = {}
                weight_names = layer_weights_dict[layer['name']]
                for name, w in zip(weight_names, layer_weights):
                    weights[name] = weights_file.get('layer_{}/{}'.format(k, w)).value.tolist()
                layer_params.append(weights)
            else:
                layer_params.append(layer[param])

        layers.append({
            'layerName': layer_name_dict[layer['name']],
            'parameters': layer_params
        })

    if compress:
        with gzip.open(save_filepath, 'wb') as f:
            f.write(json.dumps(layers).encode('utf8'))
    else:
        with open(save_filepath, 'w') as f:
            json.dump(layers, f)


In [124]:
import json
model_metadata = json.loads(model.to_json())

with open('cifar10_cnn_keras_model.json', 'w') as f:
    json.dump(model_metadata, f)

In [125]:
serialize('cifar10_cnn_keras_model.json', 
          'cifar10_cnn_keras_weights.hdf5', 
          'cifar10_cnn_model_params.json.gz', 
          True)
serialize('cifar10_cnn_keras_model.json', 
          'cifar10_cnn_keras_weights.hdf5', 
          'cifar10_cnn_model_params.json', 
          False)

In [126]:
import numpy as np
import gzip
randidx = np.random.randint(0, X_test.shape[0], size=500)
X_rand = X_test[randidx, :]
y_rand = y_test[randidx]

with gzip.open('sample_data.json.gz', 'wb') as f:
    f.write(json.dumps({'data': X_rand.tolist(), 'labels': y_rand.tolist()}).encode('utf8'))
with open('sample_data.json', 'w') as f:
    json.dump({'data': X_rand.tolist(), 'labels': y_rand.tolist()}, f)

In [127]:
%%time
model.predict(X_rand[0:1,:])

CPU times: user 3.77 ms, sys: 721 µs, total: 4.49 ms
Wall time: 4.07 ms


array([[  1.14086735e-16,   1.13544659e-20,   3.05306025e-09,
          1.41454004e-08,   6.85371488e-05,   2.38421781e-05,
          2.99920073e-15,   9.99907613e-01,   3.84884120e-18,
          2.40887647e-16]])