In [1]:
from __future__ import print_function
import keras
from keras.layers import Dense, Conv2D, BatchNormalization, Activation
from keras.layers import AveragePooling2D, Input, Flatten
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras import backend as K
from keras.models import Model
from keras.datasets import cifar10
from keras.utils import plot_model
import numpy as np
import os
import math
import tensorflow as tf
from keras.layers import *



class FractionalPooling2D(Layer):
	def __init__(self, pool_ratio = None, pseudo_random = False, overlap = False, name ='FractionPooling2D', **kwargs):
		self.pool_ratio = pool_ratio
		self.input_spec = [InputSpec(ndim=4)]
		self.pseudo_random = pseudo_random
		self.overlap = overlap
		self.name = name
		super(FractionalPooling2D, self).__init__(**kwargs)
		
	def call(self, input):
		[batch_tensor,row_pooling,col_pooling] = tf.nn.fractional_avg_pool(input, pooling_ratio = self.pool_ratio, pseudo_random = self.pseudo_random, overlapping = self.overlap, seed = 0)
		return(batch_tensor)
		
	def compute_output_shape(self, input_shape):
	
		if(K.common.image_dim_ordering() == 'channels_last' or K.common.image_dim_ordering() == 'tf'):
			if(input_shape[0] != None):
				batch_size = int(input_shape[0]/self.pool_ratio[0])
			else:
				batch_size = input_shape[0]
			width = int(input_shape[1]/self.pool_ratio[1])
			height = int(input_shape[2]/self.pool_ratio[2])
			channels = int(input_shape[3]/self.pool_ratio[3])
			return(batch_size, width, height, channels)
			
		elif(K.image_dim_ordering() == 'channels_first' or K.image_dim_ordering() == 'th'):
			if(input_shape[0] != None):
				batch_size = int(input_shape[0]/self.pool_ratio[0])
			else:
				batch_size = input_shape[0]
			channels = int(input_shape[1]/self.pool_ratio[1])
			width = int(input_shape[2]/self.pool_ratio[2])
			height = int(input_shape[3]/self.pool_ratio[3])
			return(batch_size, channels, width, height)
		
	def get_config(self):
		config = {'pooling_ratio': self.pool_ratio, 'pseudo_random': self.pseudo_random, 'overlap': self.overlap, 'name':self.name}
		base_config = super(FractionalPooling2D, self).get_config()
		return dict(list(base_config.items()) + list(config.items()))
		
	def build(self, input_shape):
		self.input_spec = [InputSpec(shape=input_shape)]



# Training parameters
batch_size = 50  # orig paper trained all networks with batch_size=128
epochs = 200
data_augmentation = True
num_classes = 10

# Subtracting pixel mean improves accuracy
subtract_pixel_mean = True

# Model parameter
# ----------------------------------------------------------------------------
#           |      | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
# Model     |  n   | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
#           |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
# ----------------------------------------------------------------------------
# ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
# ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
# ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
# ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
# ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
# ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
# ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
# ---------------------------------------------------------------------------
n = 18

# Model version
# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)
version = 1

# Computed depth from supplied model parameter n
if version == 1:
    depth = n * 6 + 2
elif version == 2:
    depth = n * 9 + 2

# Model name, depth and version
model_type = 'ResNet%dv%d' % (depth, version)

# Load the CIFAR10 data.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Input image dimensions.
input_shape = x_train.shape[1:]

# Normalize data.
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# If subtract pixel mean is enabled
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print('y_train shape:', y_train.shape)

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


def lr_schedule(epoch):
    """Learning Rate Schedule

    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
    Called automatically every epoch as part of callbacks during training.

    # Arguments
        epoch (int): The number of epochs

    # Returns
        lr (float32): learning rate
    """
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr


def resnet_layer(inputs,
                 num_filters=16,
                 kernel_size=3,
                 strides=1,
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True):
    """2D Convolution-Batch Normalization-Activation stack builder

    # Arguments
        inputs (tensor): input tensor from input image or previous layer
        num_filters (int): Conv2D number of filters
        kernel_size (int): Conv2D square kernel dimensions
        strides (int): Conv2D square stride dimensions
        activation (string): activation name
        batch_normalization (bool): whether to include batch normalization
        conv_first (bool): conv-bn-activation (True) or
            bn-activation-conv (False)

    # Returns
        x (tensor): tensor as input to the next layer
    """
    conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))

    x = inputs
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)
    return x


def resnet_v1(input_shape, depth, num_classes=10):
    """ResNet Version 1 Model builder [a]

    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
    Last ReLU is after the shortcut connection.
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filters is
    doubled. Within each stage, the layers have the same number filters and the
    same number of filters.
    Features maps sizes:
    stage 0: 32x32, 16
    stage 1: 16x16, 32
    stage 2:  8x8,  64
    The Number of parameters is approx the same as Table 6 of [a]:
    ResNet20 0.27M
    ResNet32 0.46M
    ResNet44 0.66M
    ResNet56 0.85M
    ResNet110 1.7M

    # Arguments
        input_shape (tensor): shape of input image tensor
        depth (int): number of core convolutional layers
        num_classes (int): number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    """
    if (depth - 2) % 6 != 0:
        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
    # Start model definition.
    num_filters = 16
    num_res_blocks = int((depth - 2) / 6)
    print('num res blocks:', num_res_blocks)
    act_size = 32.0

    inputs = Input(shape=input_shape, batch_shape=(batch_size, )+input_shape)
    #inputs = Input(shape=input_shape)
    x = resnet_layer(inputs=inputs)
    top = x
    # Instantiate the stack of residual units
    for stack in range(3):
        for res_block in range(num_res_blocks):
            strides = 1
            if stack > 0 and res_block == 0:  # first layer but not first stack
                ratio = 1.23
                x = FractionalPooling2D((1.0,ratio, ratio,1.0), overlap=True)(x)
                act_size = int(act_size / ratio)
                print(act_size)


            if stack > 0 and res_block == 6:  # first layer but not first stack
                ratio = 1.238
                x = FractionalPooling2D((1.0,ratio,ratio,1.0), overlap=True)(x) 
                act_size = int(act_size / ratio)
                print(act_size)


            if stack > 0 and res_block == 12:  # first layer but not first stack
                ratio = 1.25
                x = FractionalPooling2D((1.0,ratio, ratio,1.0), overlap=True)(x)
                act_size = int(act_size / ratio)
                print(act_size)

                print('res now', 32/(2*stack))
                y = MaxPooling2D(2*stack)(top)
                y = Convolution2D(num_filters, (1,1), kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(y)
                y = BatchNormalization()(y)
                y = Activation('relu')(y)
                x = Add()([x, y])


            y = resnet_layer(inputs=x,
                             num_filters=num_filters,
                             strides=strides)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters,
                             activation=None)
            
            if stack > 0 and res_block == 0:  # first layer but not first stack
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = keras.layers.add([x, y])
            x = Activation('relu')(x)
        num_filters *= 2

    # Add classifier on top.
    # v1 does not use BN after last shortcut connection-ReLU
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)
    outputs = Dense(num_classes,
                    activation='softmax',
                    kernel_initializer='he_normal')(y)

    # Instantiate model.
    model = Model(inputs=inputs, outputs=outputs)
    return model



model = resnet_v1(input_shape=input_shape, depth=depth)

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=lr_schedule(0)),
              metrics=['accuracy'])
model.summary()
print(model_type)
plot_model(model, 'model.png', show_shapes=True)

# Prepare model model saving directory.
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1)

lr_scheduler = LearningRateScheduler(lr_schedule)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)

callbacks = [ 
    lr_reducer, 
    lr_scheduler]

# Run training, with or without data augmentation.
if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True,
              callbacks=callbacks)
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator()

    # Compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    # datagen.fit(x_train)
    # Fit the model on the batches generated by datagen.flow().
    model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                        validation_data=(x_test, y_test),
                        epochs=epochs, verbose=1, workers=4,
                        callbacks=callbacks)
    # model.fit(x_train, y_train, batch_size=batch_size, callbacks=callbacks, epochs=epochs, verbose=1, workers=4)

# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Using TensorFlow backend.


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
y_train shape: (50000, 1)
num res blocks: 18
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
`seed2` and `deterministic` args are deprecated.  Use fractional_avg_pool_v2.
26
21
16
res now 16.0

13
10
8
res now 8.0

Learning rate:  0.001
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (50, 32, 32, 3)      0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (50, 32, 32, 16)     448         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization

Using real-time data augmentation.

Epoch 1/200
Learning rate:  0.001
Epoch 2/200
Learning rate:  0.001
Epoch 3/200
Learning rate:  0.001
Epoch 4/200
Learning rate:  0.001
Epoch 5/200
Learning rate:  0.001
Epoch 6/200
Learning rate:  0.001
Epoch 7/200
Learning rate:  0.001
Epoch 8/200
Learning rate:  0.001
Epoch 9/200
Learning rate:  0.001
Epoch 10/200
Learning rate:  0.001
Epoch 11/200
Learning rate:  0.001
Epoch 12/200
Learning rate:  0.001
Epoch 13/200
Learning rate:  0.001
Epoch 14/200
Learning rate:  0.001
Epoch 15/200
Learning rate:  0.001
Epoch 16/200
Learning rate:  0.001
Epoch 17/200
Learning rate:  0.001
Epoch 18/200
Learning rate:  0.001
Epoch 19/200
Learning rate:  0.001
Epoch 20/200
Learning rate:  0.001
Epoch 21/200
Learning rate:  0.001
Epoch 22/200
Learning rate:  0.001
Epoch 23/200
Learning rate:  0.001
Epoch 24/200
Learning rate:  0.001
Epoch 25/200
Learning rate:  0.001
Epoch 26/200
Learning rate:  0.001
Epoch 27/200
Learning rate:  0.001
Epoch 28/200
Learning rate: 

Epoch 48/200
Learning rate:  0.001
Epoch 49/200
Learning rate:  0.001
Epoch 50/200
Learning rate:  0.001
Epoch 51/200
Learning rate:  0.001
Epoch 52/200
Learning rate:  0.001
Epoch 53/200
Learning rate:  0.001
Epoch 54/200
Learning rate:  0.001
Epoch 55/200
Learning rate:  0.001
Epoch 56/200
Learning rate:  0.001
Epoch 57/200
Learning rate:  0.001
Epoch 58/200
Learning rate:  0.001
Epoch 59/200
Learning rate:  0.001
Epoch 60/200
Learning rate:  0.001
Epoch 61/200
Learning rate:  0.001
Epoch 62/200
Learning rate:  0.001
Epoch 63/200
Learning rate:  0.001
Epoch 64/200
Learning rate:  0.001
Epoch 65/200
Learning rate:  0.001
Epoch 66/200
Learning rate:  0.001
Epoch 67/200
Learning rate:  0.001
Epoch 68/200
Learning rate:  0.001
Epoch 69/200
Learning rate:  0.001
Epoch 70/200
Learning rate:  0.001
Epoch 71/200
Learning rate:  0.001
Epoch 72/200
Learning rate:  0.001
Epoch 73/200
Learning rate:  0.001
Epoch 74/200
Learning rate:  0.001
Epoch 75/200
Learning rate:  0.001
Epoch 76/200
Learnin

Epoch 95/200
Learning rate:  0.0001
Epoch 96/200
Learning rate:  0.0001
Epoch 97/200
Learning rate:  0.0001
Epoch 98/200
Learning rate:  0.0001
Epoch 99/200
Learning rate:  0.0001
Epoch 100/200
Learning rate:  0.0001
Epoch 101/200
Learning rate:  0.0001
Epoch 102/200
Learning rate:  0.0001
Epoch 103/200
Learning rate:  0.0001
Epoch 104/200
Learning rate:  0.0001
Epoch 105/200
Learning rate:  0.0001
Epoch 106/200
Learning rate:  0.0001
Epoch 107/200
Learning rate:  0.0001
Epoch 108/200
Learning rate:  0.0001
Epoch 109/200
Learning rate:  0.0001
Epoch 110/200
Learning rate:  0.0001
Epoch 111/200
Learning rate:  0.0001
Epoch 112/200
Learning rate:  0.0001
Epoch 113/200
Learning rate:  0.0001
Epoch 114/200
Learning rate:  0.0001
Epoch 115/200
Learning rate:  0.0001
Epoch 116/200
Learning rate:  0.0001
Epoch 117/200
Learning rate:  0.0001
Epoch 118/200
Learning rate:  0.0001
Epoch 119/200
Learning rate:  0.0001
Epoch 120/200
Learning rate:  0.0001
Epoch 121/200
Learning rate:  0.0001
Epoch 

Epoch 143/200
Learning rate:  1e-05
Epoch 144/200
Learning rate:  1e-05
Epoch 145/200
Learning rate:  1e-05
Epoch 146/200
Learning rate:  1e-05
Epoch 147/200
Learning rate:  1e-05
Epoch 148/200
Learning rate:  1e-05
Epoch 149/200
Learning rate:  1e-05
Epoch 150/200
Learning rate:  1e-05
Epoch 151/200
Learning rate:  1e-05
Epoch 152/200
Learning rate:  1e-05
Epoch 153/200
Learning rate:  1e-05
Epoch 154/200
Learning rate:  1e-05
Epoch 155/200
Learning rate:  1e-05
Epoch 156/200
Learning rate:  1e-05
Epoch 157/200
Learning rate:  1e-05
Epoch 158/200
Learning rate:  1e-05
Epoch 159/200
Learning rate:  1e-05
Epoch 160/200
Learning rate:  1e-05
Epoch 161/200
Learning rate:  1e-05
Epoch 162/200
Learning rate:  1e-06
Epoch 163/200
Learning rate:  1e-06
Epoch 164/200
Learning rate:  1e-06
Epoch 165/200
Learning rate:  1e-06
Epoch 166/200
Learning rate:  1e-06
Epoch 167/200
Learning rate:  1e-06
Epoch 168/200
Learning rate:  1e-06
Epoch 169/200
Learning rate:  1e-06
Epoch 170/200
Learning rate:

Epoch 190/200
Learning rate:  5e-07
Epoch 191/200
Learning rate:  5e-07
Epoch 192/200
Learning rate:  5e-07
Epoch 193/200
Learning rate:  5e-07
Epoch 194/200
Learning rate:  5e-07
Epoch 195/200
Learning rate:  5e-07
Epoch 196/200
Learning rate:  5e-07
Epoch 197/200
Learning rate:  5e-07
Epoch 198/200
Learning rate:  5e-07
Epoch 199/200
Learning rate:  5e-07
Epoch 200/200
Learning rate:  5e-07
Test loss: 0.9142847796678543
Test accuracy: 0.516805112361908


In [7]:
model.evaluate(x_test, y_test, batch_size=batch_size)



[1.39825473934412, 0.808899998664856]

In [5]:
 model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                        validation_data=(x_test, y_test),
                        epochs=202, verbose=1, workers=4,
                        callbacks=callbacks, initial_epoch=201)

Epoch 202/202
Learning rate:  5e-07


<keras.callbacks.callbacks.History at 0x7fa7480023d0>

In [6]:
model.evaluate_generator(datagen.flow(x_test, y_test))

[0.2651097774505615, 0.5168690085411072]