In [1]:
#import os  
#os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
#os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import keras
from keras.datasets import cifar10
from keras import backend as K
from keras.layers import Input, Conv2D, Dense, BatchNormalization, Activation
from keras.layers import GlobalAveragePooling2D, MaxPooling2D, add
from keras.models import Model
from keras.layers import SeparableConv2D

from keras import optimizers,regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.initializers import he_normal
from keras.callbacks import LearningRateScheduler, TensorBoard, ModelCheckpoint

num_classes        = 10
batch_size         = 64         # 64 or 32 or other
epochs             = 300
iterations         = 782       
USE_BN=True
DROPOUT=0.2 # keep 80%
CONCAT_AXIS=3
weight_decay=1e-4
DATA_FORMAT='channels_last' # Theano:'channels_first' Tensorflow:'channels_last'

log_filepath  = './xception_1'

Using TensorFlow backend.


In [2]:
def color_preprocessing(x_train,x_test):
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    mean = [125.307, 122.95, 113.865]
    std  = [62.9932, 62.0887, 66.7048]
    for i in range(3):
        x_train[:,:,:,i] = (x_train[:,:,:,i] - mean[i]) / std[i]
        x_test[:,:,:,i] = (x_test[:,:,:,i] - mean[i]) / std[i]
    return x_train, x_test

def scheduler(epoch):
    if epoch < 100:
        return 0.01
    if epoch < 200:
        return 0.001
    return 0.0001

# load data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test  = keras.utils.to_categorical(y_test, num_classes)
x_train, x_test = color_preprocessing(x_train, x_test)

In [3]:
# 36 convolutional layers are structured into 14 modules
def entryflow(x,params,top=False):
    # modules 2-4,13
    # params is (3,)
    # top = true means module 2, don't use relu
    residual = Conv2D(params[0], (1, 1), strides=(2, 2),padding='same')(x)
    residual = BatchNormalization()(residual)
    if top:
        x = Activation('relu')(x)
    x = SeparableConv2D(params[1], (3, 3),padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(params[2], (3, 3),padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((3, 3), strides=(2, 2),padding='same')(x)
    x = add([x, residual])
    return x
    
def middleflow(x,params):
    # modules 5-12, params is int
    residual = x
    x = Activation('relu')(x)
    x = SeparableConv2D(params, (3, 3),padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(params, (3, 3),padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(params, (3, 3),padding='same')(x)
    x = BatchNormalization()(x)
    x = add([x, residual])
    return x

def exitflow(x,params):
    # modules 14 , params is (2,)
    x = SeparableConv2D(params[0], (3, 3),padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(params[1], (3, 3),padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)   
    return x

In [4]:
def xception(img_input,shallow=False, classes=10):
    # modules 1
    x = Conv2D(32,(3, 3),strides=(2, 2),padding='same')(img_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(64, (3, 3),strides=(1,1),padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    # module 2
    x = entryflow(x,(128,128,128),top=True)
    # module 3-4
    x = entryflow(x,(256,256,256))
    x = entryflow(x,(728,728,728))
    # module 5-12
    for _ in range(8):
        x = middleflow(x,728)
    # module 13
    x = entryflow(x,(1024,728,1024))
    # module 14
    x = exitflow(x,(1536,2048))
    # output
    x = GlobalAveragePooling2D()(x)
    x = Dense(classes, activation='softmax')(x)
    return x

In [5]:
img_input=Input(shape=(32,32,3))
output = xception(img_input)
model=Model(img_input,output)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 16, 16, 32)   896         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 16, 16, 32)   128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 16, 16, 32)   0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [None]:
# set optimizer
sgd = optimizers.SGD(lr=.1, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# set callback
tb_cb = TensorBoard(log_dir=log_filepath, histogram_freq=0)
change_lr = LearningRateScheduler(scheduler)
cbks = [change_lr,tb_cb]

# set data augmentation
datagen = ImageDataGenerator(horizontal_flip=True,
                             width_shift_range=0.125,
                             height_shift_range=0.125,
                             fill_mode='constant',cval=0.)
datagen.fit(x_train)

# start training
model.fit_generator(datagen.flow(x_train, y_train,batch_size=batch_size),
                    steps_per_epoch=iterations,
                    epochs=epochs,
                    callbacks=cbks,
                    validation_data=(x_test, y_test))
model.save('xception_1.h5')

Epoch 1/300
 52/782 [>.............................] - ETA: 1:02:24 - loss: 2.0132 - acc: 0.2575