This code is for an image segmentation task. It defines a model, imports the libraries, and structures training.

In [None]:
from __future__ import print_function
from __future__ import absolute_import

import warnings
from keras import optimizers, regularizers

from keras.layers import Input, TimeDistributed, GaussianNoise, GaussianDropout, SeparableConv2D
from keras import layers
from keras.layers import Reshape
from keras.layers import Permute
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Flatten, Multiply, Add, Concatenate, Maximum, Subtract, Average
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import AveragePooling2D
from keras.layers import GlobalAveragePooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import BatchNormalization
from keras.models import Model
from keras import backend as K
from keras.engine.topology import get_source_inputs
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.layers import merge, Convolution2D, UpSampling2D,Deconvolution2D,AtrousConvolution2D,ZeroPadding2D,multiply,Conv2DTranspose
from keras.losses import binary_crossentropy
from keras import losses
import keras.backend as K
from keras.utils import conv_utils
from keras.engine.topology import Layer
from keras.engine import InputSpec
from keras.callbacks import LearningRateScheduler, ModelCheckpoint
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
import itertools

Next, I added a data generator. The goal of this is to create transformations of the original data set in a way that teaches the model a wider array of patterns. This increases generalization capacity of the model. I had enormous trouble adding the elastic transforms into the Keras data generator, but if ran separately on data sets with lot's of variance in shape of objects, it's a great way to increase the model capacity, particularly near object boundaries.

In [None]:
seed = 42
random.seed = seed
np.random.seed(seed=seed)

data_gen_args = dict(
                         #zca_whitening=True,
                         #zca_epsilon=1e-5,
                         #featurewise_std_normalization=True,
                         rotation_range=0.2,
                         width_shift_range=0.33,
                         height_shift_range=0.33,
                         #channel_shift_range=0.0001,
                         shear_range=0.1,
                         zoom_range=0.3,
                         horizontal_flip=True,
                         vertical_flip=True,
                         fill_mode='reflect')  #use 'constant'??
# Train data, provide the same seed and keyword arguments to the fit and flow methods

import numpy as np
from scipy.ndimage.interpolation import map_coordinates
from scipy.ndimage.filters import gaussian_filter
from keras.preprocessing.image import ImageDataGenerator
# input generator with standardization on
def elastic_transform(X):
    
    random_state = None
    if random_state is None:
        random_state = np.random.RandomState(None)
    sigma = .001
    alpha = .001
    shape = X.shape
    dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha
    dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha
    dz = np.zeros_like(dx)
 
    x, y, z = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), np.arange(shape[2]))
    print x.shape
    indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1)), np.reshape(z, (-1, 1))
 
    distored_image = map_coordinates(X, indices, order=1, mode='reflect')
    return distored_image.reshape(X.shape)

X_datagen = ImageDataGenerator(**data_gen_args)
Y_datagen = ImageDataGenerator(**data_gen_args)
X_datagen.fit(np.asarray(X_train), augment=True, seed=seed)
Y_datagen.fit(np.asarray(Y_train), augment=True, seed=seed)

X_train_augmented = X_datagen.flow(np.asarray(X_train), batch_size=4, shuffle=True, seed=seed)
Y_train_augmented = Y_datagen.flow(np.asarray(Y_train), batch_size=4, shuffle=True, seed=seed)

import itertools 
train_generator = itertools.izip(X_train_augmented, Y_train_augmented) 


Below, a few loss functions for image segmentation are defined.

In [None]:
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2.0 * intersection + 1.0) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1.0)


def jacard_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (intersection + 1.0) / (K.sum(y_true_f) + K.sum(y_pred_f) - intersection + 1.0)


def jacard_coef_loss(y_true, y_pred):
    return -jacard_coef(y_true, y_pred)


def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2.0 * intersection + 1.0) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1.0)




Some of the custom layers I used for models are shown. I used squeeze excite blocks, and a few variants of grouped and separable convolution blocks. The code for the superseparable grouped convolution block only seems to work with certain versions of Keras (I couldn't run it with Keras. 2.1 or up), but, it is basically a grouped convolution, borrowed from a resnext model, with a separable convolution layer instead of a regular convolution layer for the depthwise convolution step. The edis layer is meant to mimic the multiplicative integration method used in some recurrent neural network models. This should allow for self gating asnd an attention like mechanism, like the swish activation function, and also allow the merge to take a very flexble form. In theory, it can be used any time there is a merge operation such as in an lstm unit, or here, as part of a gating operation. For what it's worth, it didn't seem to improve things for this particular model. Perhaps the expressiveness it gives a recurrent model is already occurring for a convolutional model.

In [3]:
def squeeze_excite_block1(input, ratio=8):
    ''' Create a squeeze-excite block usually added right before the add merge
    Args:
        input: input tensor
        filters: number of output filters
        k: width factor
    Returns: a keras tensor
    '''
    init = input
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    filters = init._keras_shape[channel_axis]
    se_shape = (1, 1, filters)

    se = GlobalAveragePooling2D()(init)
    se = Reshape(se_shape)(se)
    se = Dense(filters // ratio, activation='elu', kernel_initializer='he_normal', use_bias=False)(se)
    se = GaussianDropout(.01)(se)
    se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)

    if K.image_data_format() == 'channels_first':
        se = Permute((3, 1, 2))(se)

    x = multiply([init, se])
    return 

def __grouped_convolution_block(input, grouped_channels, cardinality, strides, weight_decay=5e-4):
    ''' Adds a grouped convolution block. It is an equivalent block from the paper
    Args:
        input: input tensor
        grouped_channels: grouped number of filters
        cardinality: cardinality factor describing the number of groups
        strides: performs strided convolution for downscaling if > 1
        weight_decay: weight decay term
    Returns: a keras tensor
    '''
    init = input
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    group_list = []

    if cardinality == 1:
        # with cardinality 1, it is a standard convolution
        x = Conv2D(grouped_channels, (3, 3), padding='same', use_bias=False, strides=(strides, strides),
                   kernel_initializer='he_normal')(init)
        x = BatchNormalization(axis=channel_axis)(x)
        x = Activation('relu')(x)
        return x

    for c in range(cardinality):
        x = Lambda(lambda z: z[:, :, :, c * grouped_channels:(c + 1) * grouped_channels]
        if K.image_data_format() == 'channels_last' else
        lambda z: z[:, c * grouped_channels:(c + 1) * grouped_channels, :, :])(input)

        x = Conv2D(grouped_channels, (3, 3), padding='same', use_bias=False, strides=(strides, strides),
                   kernel_initializer='he_normal')(x)

        group_list.append(x)

    group_merge = concatenate(group_list, axis=channel_axis)
    x = BatchNormalization(axis=channel_axis)(group_merge)
    x = Activation('relu')(x)

    return x
def superseparable_convolution(input, width1, grouped_channels, cardinality, name):
    ''' Adds a grouped convolution block. It is an equivalent block from the paper
    Args:
        input: input tensor
        grouped_channels: grouped number of filters
        cardinality: cardinality factor describing the number of groups
        strides: performs strided convolution for downscaling if > 1
        weight_decay: weight decay term
    Returns: a keras tensor
    '''
    init = input
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    group_list = []

    if cardinality == 1:
        # with cardinality 1, it is a standard convolution
        x = Conv2D(grouped_channels, (width1, width1), padding='same', use_bias=False, strides=(strides, strides),
                   kernel_initializer='he_normal')(init)
        x = BatchNormalization(axis=channel_axis)(x)
        x = Activation('relu')(x)
        return x

    for c in range(cardinality):   #cardinality should be group 2 or 3, alternating, otherwise no information is exchanged
        x = Lambda(lambda z: z[:, :, :, c * grouped_channels:(c + 1) * grouped_channels]
        if K.image_data_format() == 'channels_last' else
        lambda z: z[:, c * grouped_channels:(c + 1) * grouped_channels, :, :])(input)

        x = SeparableConv2D(grouped_channels, (width1, width1), padding='same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', use_bias=False,
                   kernel_initializer='he_normal')(x)

        group_list.append(x)

    group_merge = concatenate(group_list, axis=channel_axis)
    x = BatchNormalization(axis=channel_axis)(group_merge)
    x = Activation('relu')(x)
    return x

    
def grouped_convolution(y, nb_channels, cardinality, _strides):
        # when `cardinality` == 1 this is just a standard convolution
        if cardinality == 1:
            return layers.Conv2D(nb_channels, kernel_size=(3, 3), strides=_strides, padding='same')(y)
        
        assert not nb_channels % cardinality
        _d = nb_channels // cardinality

        # in a grouped convolution layer, input and output channels are divided into `cardinality` groups,
        # and convolutions are separately performed within each group
        groups = []
        for j in range(cardinality):
            group = layers.Lambda(lambda z: z[:, :, :, j * _d:j * _d + _d])(y)
            groups.append(layers.SeparableConv2D(_d, kernel_size=(3, 3), strides=_strides, padding='same')(group))
            
        # the grouped convolutional layer concatenates them as the outputs of the layer
        y = layers.concatenate(groups)

        return 
    
def edis(inputs):
    s = inputs[0]*inputs[1] + .5*inputs[0] + .5*inputs[1] +.25
    
    return s

def euclid_dist(v):
    return (v[0] - v[1])**2

def out_shape(shapes):
    return shapes[0]


Below are a few of the blocks. These are combinations of the above layers, with different merge functions. Basically, a mix of resnet and densenet architectures, where concatenation, adding, or the multiplicative integration/edis layers are used.

In [4]:
def lblock6(x, filters, width1, name):
    Train1 = True
    x1 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'a')(x)   #filters=grouped_channels, caridnality=2
    x1 = BatchNormalization(momentum=.9)(x1)
    
    x2 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'b')(x1) 
    x2 = BatchNormalization(momentum=.9)(x2)
    #x2 = grouped_convolution(x2, filters, 2, 1)
    x3 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'c')(x1) 
    x3 = BatchNormalization(momentum=.9)(x3)
    x3 = Activation('sigmoid')(x3)
    
    x4 = Lambda(edis)([x3, x2])
    x4 = Lambda(zerolower2)(x4)
    x4 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'd')(x4)   #filters=g
    x4 = squeeze_excite_block(x4, name, ratio=8)
    x4 = Add()([x1, x4])
    x4 = Activation('elu')(x4)
    return x4   

def lblock5(x, filters, width1, name):
    Train1 = True
    x1 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'a')(x)   #filters=grouped_channels, caridnality=2
    x1 = BatchNormalization(momentum=.9)(x1)
    
    x2 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'b')(x1) 
    x2 = BatchNormalization(momentum=.9)(x2)
    #x2 = grouped_convolution(x2, filters, 2, 1)
    x3 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'c')(x1) 
    x3 = BatchNormalization(momentum=.9)(x3)
    x3 = Activation('sigmoid')(x3)
    
    x4 = Lambda(edis)([x3, x2])
    
    x4 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'd')(x4)   #filters=g
    x4 = squeeze_excite_block(x4, name, ratio=8)
    x4 = Add()([x1, x4])
    x4 = Activation('elu')(x4)
    return x4   

def lblock5a(x, filters, width1, name):
    Train1 = True
    x1 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'a')(x)   #filters=grouped_channels, caridnality=2
    x1 = BatchNormalization(momentum=.9)(x1)
    
    x2 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'b')(x1) 
    x2 = BatchNormalization(momentum=.9)(x2)
    
    x3 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'c')(x1) 
    x3 = BatchNormalization(momentum=.9)(x3)
    x3 = Activation('sigmoid')(x3)
    
    x4 = Lambda(edis)([x3, x2])
    x4 = Lambda(zerolower1)(x4)
    x4 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'd')(x4) 
    x4 = BatchNormalization(momentum=.9)(x4)
    x4 = squeeze_excite_block(x4, name, ratio=8)
    x4 = Add()([x1, x4])
    x4 = Activation('elu')(x4)
    return x4

def lblock5c(x, filters, width1, name):
    Train1 = True
    x1 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'a')(x)   #filters=grouped_channels, caridnality=2
    x1 = BatchNormalization(momentum=.9)(x1)
    
    x2 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'b')(x1) 
    x2 = BatchNormalization(momentum=.9)(x2)
    
    x3 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'c')(x1) 
    x3 = BatchNormalization(momentum=.9)(x3)
    x3 = Activation('sigmoid')(x3)
    
    x4 = Lambda(edis)([x3, x2])
    x4 = Lambda(zerolower2)(x4)
    x4 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'd')(x4) 
    x4 = BatchNormalization(momentum=.9)(x4)
    x4 = squeeze_excite_block(x4, name, ratio=8)
    x4 = Add()([x1, x4])
    x4 = Activation('elu')(x4)
    return x4    
    
def lblock2(x, filters, width1, name):
    Train1 = True
    x1 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'a')(x)   #filters=grouped_channels, caridnality=2
    x1 = BatchNormalization(momentum=.9)(x1)
    
    x2 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'b')(x1) 
    x2 = BatchNormalization(momentum=.9)(x2)
    
    x3 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'c')(x1) 
    x3 = BatchNormalization(momentum=.9)(x3)
    x3 = Activation('sigmoid')(x3)
    
    x4 = Multiply()([x2, x3])
    
    x4 = SeparableConv2D(filters, width1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = name + 'd')(x4)   #filters=g
    x4 = squeeze_excite_block(x4, name, ratio=8)
    x4 = Add()([x1, x4])
    x4 = Activation('elu')(x4)
    return x4


Below, two sample models are shown. The concatenated pooling layer was wildly inefficient both in terms of computation time and overfitting. The idea was to make the pooling layer more flexible by allowing it to learn how to weight max pooling and average pooling operations in a way that better described the data, but at least on a data set towards the small side in terms of the number of samples, it was consistently difficult to prevent overfitting.

In [None]:
def lb_unet12():
    Input1 = Input(shape=(256,256,3))
    x = GaussianDropout(.02)(Input1)
    x1 = lblock5(x, 24, 3, 'a4')
    x1 = lblock5(x1, 24, 3, 'a1')
    x2 = concatenated_pooling2(x1)  #128
   
    x3 = lblock5(x2, 48, 3, 'cbc')
    #x3 = lblock(x2, 32,  'cbcc')
   
    x4 = concatenated_pooling2(x3)  #64
  
    x5 = lblock5(x4, 96, 3, 'c')
    x6 = concatenated_pooling2(x5) #32
    
    x7a = lblock5(x6, 96, 1, 'k')
    
    #x7 = lblock(x6, 64, 13, 'd')
    x8 = lblock5(x6, 96, 7, 'e')
    x9 = lblock5(x6, 96, 15,  'f')
    
    
    x10 = Concatenate(axis=3)([x7a, x8, x9])
    x11 = SeparableConv2D(96, 1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = 'atrsum')(x10)
    x11 = BatchNormalization(momentum=.9)(x11)
    x11 = activ3(x11)
    x11 = GaussianDropout(.02)(x11)
    x12 = Concatenate(axis=3)([x11,x6])
    x13 = UpSampling2D((2,2))(x12) #64
    x14 = lblock5(x13, 48, 3, 'g')
    
    x15 = Concatenate(axis=3)([x14,x4]) 
    x16 = UpSampling2D((2,2))(x15) #128
    x17 = lblock5(x16, 48, 3, 'h')
    
    x18 = Concatenate(axis=3)([x17,x2]) 
    x19 = UpSampling2D((2,2))(x18) #128
    x20 = lblock5(x19, 48, 3, 'i')
    x20 = GaussianDropout(.01)(x20)
    #x21 = UpSampling2D((2,2))(x20) #128
    x22 = lblock5(x20, 24, 3, 'j')
   
    x22 = lblock5(x22, 16, 3, 'jj')
    xout = Conv2D(1,1, kernel_initializer=dice_coef_loss, padding='same', activation='sigmoid')(x22)
    model=Model(Input1, xout)
    return model
modelt6.compile(optimizer=ADM, loss=dice_coef_loss, metrics=[binary_crossentropy, 'accuracy'])
modelt6 = lb_unet12()
ADM = optimizers.Adam(lr=0.0059, beta_1=0.9, beta_2=0.999, decay=0.0005, clipnorm=.9)
modelt6.fit_generator(train_generator, steps_per_epoch=600, epochs=3, shuffle=True)
modelt6.save('lb_unet12t_2.h5')  #2 epoches .0985 bince




modelt6.compile(optimizer=ADM, loss = dice_coef_loss, metrics=[binary_crossentropy, dice_coef, 'accuracy'])
modelt6.save('lb_unet12t_1.h5')


def lb_unet11():
    Input1 = Input(shape=(256,256,3))
    x = GaussianDropout(.02)(Input1)
    x1 = lblock5(x, 24, 3, 'a4')
    x1 = lblock5(x1, 24, 3, 'a1')
    x2 = concatenated_pooling2(x1)  #128
   
    x3 = lblock6a(x2, 48, 3, 'cbc')
    #x3 = lblock(x2, 32,  'cbcc')
   
    x4 = concatenated_pooling2(x3)  #64
  
    x5 = lblock6a(x4, 96, 3, 'c')
    x6 = concatenated_pooling2(x5) #32
    
    x7a = lblock6a(x6, 96, 1, 'k')
    
    #x7 = lblock(x6, 64, 13, 'd')
    x8 = lblock6a(x6, 96, 7, 'e')
    x9 = lblock6a(x6, 96, 15,  'f')
    
    
    x10 = Concatenate(axis=3)([x7a, x8, x9])
    x11 = SeparableConv2D(96, 1, padding = 'same', depthwise_initializer = 'he_normal', pointwise_initializer = 'he_normal', name = 'atrsum')(x10)
    x11 = BatchNormalization(momentum=.9)(x11)
    x11 = activ3(x11)
    x11 = GaussianDropout(.02)(x11)
    x12 = Concatenate(axis=3)([x11,x6])
    x13 = UpSampling2D((2,2))(x12) #64
    x14 = lblock6a(x13, 48, 3, 'g')
    
    x15 = Concatenate(axis=3)([x14,x4]) 
    x16 = UpSampling2D((2,2))(x15) #128
    x17 = lblock6a(x16, 48, 3, 'h')
    
    x18 = Concatenate(axis=3)([x17,x2]) 
    x19 = UpSampling2D((2,2))(x18) #128
    x20 = lblock6a(x19, 48, 3, 'i')
    x20 = GaussianDropout(.01)(x20)
    #x21 = UpSampling2D((2,2))(x20) #128
    x22 = lblock5(x20, 24, 3, 'j')
   
    x22 = lblock5(x22, 16, 3, 'jj')
    xout = Conv2D(1,1, kernel_initializer='he_normal', padding='same', activation='sigmoid')(x22)
    model=Model(Input1, xout)
    return model
modelt6.compile(optimizer=ADM, loss='binary_crossentropy', metrics=[binary_crossentropy, 'accuracy'])
modelt6 = lb_unet11()
ADM = optimizers.Adam(lr=0.0099, beta_1=0.9, beta_2=0.999, decay=0.0005, clipnorm=.9)
modelt6.fit_generator(train_generator, steps_per_epoch=600, epochs=2, shuffle=True)
modelt6.save('lb_unet11t_1.h5')  #2 epoches .0985 bince
