In [1]:
from keras.models import Model
from keras.layers import Input, merge, ZeroPadding2D
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.convolutional import Convolution2D
from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import keras.backend as K

from custom_layers import Scale

def DenseNet(nb_dense_block=2, growth_rate=16, nb_filter=32, reduction=0.0, dropout_rate=0.2, weight_decay=1e-4, classes=1, weights_path=None):
    '''Instantiate the DenseNet 121 architecture,
        # Arguments
            nb_dense_block: number of dense blocks to add to end
            growth_rate: number of filters to add per dense block
            nb_filter: initial number of filters
            reduction: reduction factor of transition blocks.
            dropout_rate: dropout rate
            weight_decay: weight decay factor
            classes: optional number of classes to classify images
            weights_path: path to pre-trained weights
        # Returns
            A Keras model instance.
    '''
    eps = 1.1e-5
    # bn
    bn_momentum = 0.99
    #
    activation = 'elu'
    # compute compression factor
    compression = 1.0 - reduction

    # Handle Dimension Ordering for different backends
    global concat_axis
    if K.image_dim_ordering() == 'tf':
      concat_axis = 3
      img_input = Input(shape=(75, 75, 3), name='data')
    else:
      concat_axis = 1
      img_input = Input(shape=(3, 75, 75), name='data')
    
    angle_input = Input( shape = [1], name = 'angle' )

    # From architecture for ImageNet (Table 1 in the paper)
    nb_filter = 32
    nb_layers = [2,4] # For DenseNet-121

    # Initial convolution
    x = ZeroPadding2D((1, 1), name='conv1_zeropadding')(img_input)
    x = Convolution2D(nb_filter, 3, 3, subsample=(2, 2), name='conv1', bias=False)(x)
    x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x)
    x = Scale(axis=concat_axis, name='conv1_scale')(x)
    x = Activation('relu', name='relu1')(x)
    x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x)
    x = MaxPooling2D((5, 5), strides=(1, 1), name='pool1')(x)

    # Add dense blocks
    for block_idx in range(nb_dense_block - 1):
        stage = block_idx+2
        x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay)

        # Add transition_block
        x = transition_block(x, stage, nb_filter, compression=compression, dropout_rate=dropout_rate, weight_decay=weight_decay)
        nb_filter = int(nb_filter * compression)

    final_stage = stage + 1
    x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay)

    x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv'+str(final_stage)+'_blk_bn')(x)
    x = Scale(axis=concat_axis, name='conv'+str(final_stage)+'_blk_scale')(x)
    x = Activation('relu', name='relu'+str(final_stage)+'_blk')(x)
    x = GlobalAveragePooling2D(name='pool'+str(final_stage))(x)
    
    #resnet
    img_1 = Conv2D( 32, kernel_size = (3, 3), activation = activation, padding = 'same' ) ((BatchNormalization(momentum=bn_momentum) ) ( img_input) )
    img_1 = MaxPooling2D( (2,2)) (img_1 )
    img_1 = Dropout( 0.2 )( img_1 )

    img_1 = Conv2D( 64, kernel_size = (3, 3), activation = activation, padding = 'same' ) ( (BatchNormalization(momentum=bn_momentum)) (img_1) )
    img_1 = MaxPooling2D( (2,2) ) ( img_1 )
    img_1 = Dropout( 0.2 )( img_1 )

     # Residual block
    img_2 = Conv2D( 128, kernel_size = (3, 3), activation = activation, padding = 'same' ) ( (BatchNormalization(momentum=bn_momentum)) (img_1) )
    img_2 = Dropout(0.2) ( img_2 )
    img_2 = Conv2D( 64, kernel_size = (3, 3), activation = activation, padding = 'same' ) ( (BatchNormalization(momentum=bn_momentum)) (img_2) )
    img_2 = Dropout(0.2) ( img_2 )

    img_res = add( [img_1, img_2] )

    # Filter resudial output
    img_res = Conv2D( 128, kernel_size = (3, 3), activation = activation ) ( (BatchNormalization(momentum=bn_momentum)) (img_res) )
    img_res = MaxPooling2D( (2,2) ) ( img_res )
    img_res = Dropout( 0.2 )( img_res )
    img_res = GlobalMaxPooling2D() ( img_res )
    
    
    x = ( Concatenate()( [x, img_res, BatchNormalization(momentum=bn_momentum)(angle_input)]) )
    
    dense_layer = Dropout( 0.5 ) ( BatchNormalization(momentum=bn_momentum) (Dense(512, activation = 'elu') (x)) )
    dense_layer = Dropout( 0.5 ) ( BatchNormalization(momentum=bn_momentum) (Dense(256, activation = 'elu') (dense_layer)) )
    x = Dense(classes, activation = 'sigmoid', name='fc6')(dense_layer)
#     x = Activation('sigmoid', name='prob')(x)

    model = Model([img_input, angle_input], x, name='densenet')
    
    opt = Adam( lr = 1e-3, beta_1 = .9, beta_2 = .999, decay = 1e-3 )
    model.compile( loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'] )

    if weights_path is not None:
      model.load_weights(weights_path)
    
    model.summary()
    return model


def conv_block(x, stage, branch, nb_filter, dropout_rate=None, weight_decay=1e-4):
    '''Apply BatchNorm, Relu, bottleneck 1x1 Conv2D, 3x3 Conv2D, and option dropout
        # Arguments
            x: input tensor 
            stage: index for dense block
            branch: layer index within each dense block
            nb_filter: number of filters
            dropout_rate: dropout rate
            weight_decay: weight decay factor
    '''
    eps = 1.1e-5
    conv_name_base = 'conv' + str(stage) + '_' + str(branch)
    relu_name_base = 'relu' + str(stage) + '_' + str(branch)

    # 1x1 Convolution (Bottleneck layer)
    inter_channel = nb_filter * 4  
    x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_x1_bn')(x)
    x = Scale(axis=concat_axis, name=conv_name_base+'_x1_scale')(x)
    x = Activation('relu', name=relu_name_base+'_x1')(x)
    x = Convolution2D(inter_channel, 1, 1, name=conv_name_base+'_x1', bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    # 3x3 Convolution
    x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_x2_bn')(x)
    x = Scale(axis=concat_axis, name=conv_name_base+'_x2_scale')(x)
    x = Activation('relu', name=relu_name_base+'_x2')(x)
    x = ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding')(x)
    x = Convolution2D(nb_filter, 3, 3, name=conv_name_base+'_x2', bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    return x


def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4):
    ''' Apply BatchNorm, 1x1 Convolution, averagePooling, optional compression, dropout 
        # Arguments
            x: input tensor
            stage: index for dense block
            nb_filter: number of filters
            compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block.
            dropout_rate: dropout rate
            weight_decay: weight decay factor
    '''

    eps = 1.1e-5
    conv_name_base = 'conv' + str(stage) + '_blk'
    relu_name_base = 'relu' + str(stage) + '_blk'
    pool_name_base = 'pool' + str(stage) 

    x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_bn')(x)
    x = Scale(axis=concat_axis, name=conv_name_base+'_scale')(x)
    x = Activation('relu', name=relu_name_base)(x)
    x = Convolution2D(int(nb_filter * compression), 1, 1, name=conv_name_base, bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x)

    return x


def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True):
    ''' Build a dense_block where the output of each conv_block is fed to subsequent ones
        # Arguments
            x: input tensor
            stage: index for dense block
            nb_layers: the number of layers of conv_block to append to the model.
            nb_filter: number of filters
            growth_rate: growth rate
            dropout_rate: dropout rate
            weight_decay: weight decay factor
            grow_nb_filters: flag to decide to allow number of filters to grow
    '''

    eps = 1.1e-5
    concat_feat = x

    for i in range(nb_layers):
        branch = i+1
        x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate, weight_decay)
        concat_feat = merge([concat_feat, x], mode='concat', concat_axis=concat_axis, name='concat_'+str(stage)+'_'+str(branch))

        if grow_nb_filters:
            nb_filter += growth_rate

    return concat_feat, nb_filter


Using TensorFlow backend.


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py
import time
import cv2

from sklearn.model_selection import train_test_split
from keras.models import Model, Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D, Dense, Dropout, BatchNormalization, Input, Flatten, Activation
from keras.layers.merge import Concatenate, add
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator

In [3]:
def get_callbacks( weight_save_path, no_improv_epochs = 10, min_delta = 1e-4 ):
    es = EarlyStopping( 'val_loss', patience = no_improv_epochs, mode = 'min', min_delta = min_delta )
    ms = ModelCheckpoint( weight_save_path, 'val_loss', save_best_only = True )

    return [ es, ms ]

def get_scaled_imgs(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
        a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

        imgs.append(np.dstack((a, b, c)))

    return np.array(imgs)

def generate_data( data ):
    X_band_1=np.array( [np.array(band).astype(np.float32).reshape(75, 75) 
                        for band in data['band_1']] )
    X_band_2=np.array( [np.array(band).astype(np.float32).reshape(75, 75) 
                        for band in data['band_2']] )
    X = np.concatenate( [X_band_1[:, :, :, np.newaxis], X_band_2[:, :, :, np.newaxis], \
                        ((X_band_1 + X_band_2)/2)[:, :, :, np.newaxis]], axis=-1 )
    return X

def augment_data( generator, X1, X2, y, batch_size = 32 ):
    generator_seed = np.random.randint( 9999 )
    gen_X1 = generator.flow( X1, y, batch_size = batch_size, seed = generator_seed )
    gen_X2 = generator.flow( X1, X2, batch_size = batch_size, seed = generator_seed )

    while True:
        X1i = gen_X1.next()
        X2i = gen_X2.next()

        yield [ X1i[0], X2i[1] ], X1i[1]
    
def plot_band_samples( data, band = 1, title = None ):
    fig = plt.figure( 1, figsize=(15, 15) )
    for i in range(9):
        ax = fig.add_subplot( 3, 3, i + 1 )
        arr = np.reshape( np.array(data.iloc[i, band - 1]), (75, 75) )
        ax.imshow( arr, cmap='inferno' )
        fig.suptitle( title )

    plt.show()

def plot_all_bands( data, title = None ):
    fig = plt.figure( 1, figsize = (15, 15) )
    count = 1
    for i in range(3):
        for j in range(3):
            ax = fig.add_subplot( 3, 3, count )
            ax.imshow( data[i, :, :, j], cmap = 'inferno' )
            count += 1
            if i == 0:
                if j == 0:
                    ax.set_title( 'Band 1' , fontsize = 12)
                elif j == 1:
                    ax.set_title( 'Band 2', fontsize = 12 )
                elif j == 2:
                    ax.set_title( 'Average', fontsize = 12 )
    fig.suptitle( title, fontsize = 14, fontweight = 'bold' )
    plt.show()

def make_plots( data, band_samples = True, all_bands = True ):
    ships = data[ data.is_iceberg == 0 ].sample( n = 9, random_state = 42 )
    icebergs = data[ data.is_iceberg == 1 ].sample( n = 9, random_state = 42 )

    np_ships = generate_data( ships )
    np_icebergs = generate_data( icebergs )
    
    if band_samples:
        plot_band_samples( ships, band = 2, title = 'Ship image samples' )
        plot_band_samples( icebergs, band = 2, title = 'Iceberg image samples' )

    if all_bands:
        plot_all_bands( np_ships, 'Image bands for ships' )
        plot_all_bands( np_icebergs, 'Image bands for icebergs' )

 
 
 
TEST = True # Should test data be passed to the model?
DO_PLOT = False # Exploratory data plots
USE_AUGMENTATION = False # Whether or not image augmentations should be made
TRAIN_PATH = 'kaggle_lceberg_data/train.json'
TEST_PATH = 'kaggle_lceberg_data/test.json'
WEIGHT_SAVE_PATH = 'model_weights.hdf5'
PREDICTION_SAVE_PATH = 'kaggle_lceberg_data/submission'

if TEST:
    SEED = np.random.randint( 9999 )
else:
    SEED = 42 # Constant seed for comparability between runs

BATCH_SIZE = 16
EPOCHS = 100 # Increase this

train_data = pd.read_json( TRAIN_PATH )
# train_data[ 'inc_angle' ] = train_data[ 'inc_angle' ].replace('na', 0)
# train_data[ 'inc_angle' ] = train_data[ 'inc_angle' ].astype(float).fillna(0.0)

# X = generate_data( train_data )
X = get_scaled_imgs(train_data)
X_a = train_data[ 'inc_angle' ]
y = train_data[ 'is_iceberg' ]

train_data.inc_angle = train_data.inc_angle.replace('na',0)
idx_tr = np.where(train_data.inc_angle>0)


y = y[idx_tr[0]]
X_a = X_a[idx_tr[0]]
X = X[idx_tr[0],...]

def get_more_images(imgs):
    
    more_images = []
    vert_flip_imgs = []
    hori_flip_imgs = []
      
    for i in range(0,imgs.shape[0]):
        a=imgs[i,:,:,0]
        b=imgs[i,:,:,1]
        c=imgs[i,:,:,2]
        
        av=cv2.flip(a,1)
        ah=cv2.flip(a,0)
        bv=cv2.flip(b,1)
        bh=cv2.flip(b,0)
        cv=cv2.flip(c,1)
        ch=cv2.flip(c,0)
        
        vert_flip_imgs.append(np.dstack((av, bv, cv)))
        hori_flip_imgs.append(np.dstack((ah, bh, ch)))
      
    v = np.array(vert_flip_imgs)
    h = np.array(hori_flip_imgs)
       
    more_images = np.concatenate((imgs,v,h))
    
    return more_images

X_train, X_val, X_angle_train, X_angle_val, y_train, y_val = train_test_split( X, X_a, y, train_size = .8, random_state = SEED )

X_train = get_more_images(X_train)
X_angle_train = np.concatenate((X_angle_train,X_angle_train,X_angle_train))
y_train = np.concatenate((y_train,y_train,y_train))

# X = get_more_images(X)
# X_a = np.concatenate((X_a,X_a,X_a))
# y = np.concatenate((y,y,y))

if DO_PLOT:
    make_plots( train_data, band_samples = True, all_bands = True )

# X_train, X_val, X_angle_train, X_angle_val, y_train, y_val = train_test_split( X, X_a, y, train_size = .9, random_state = SEED )
callback_list = get_callbacks( WEIGHT_SAVE_PATH, 20 )

model = DenseNet()
start_time = time.time()

if USE_AUGMENTATION:
    image_augmentation = ImageDataGenerator( rotation_range = 20,
                                             horizontal_flip = True,
                                             vertical_flip = True,
                                             width_shift_range = .3,
                                             height_shift_range =.3,
                                             zoom_range = .1 )
    input_generator = augment_data( image_augmentation, X_train, X_angle_train, y_train, batch_size = BATCH_SIZE )

    model.fit_generator( input_generator, steps_per_epoch = 4096/BATCH_SIZE, epochs = EPOCHS,
                         callbacks = callback_list, verbose = 2, 
                         validation_data = augment_data(image_augmentation, X_val, X_angle_val, y_val, batch_size = BATCH_SIZE),
                         validation_steps = len(X_val)/BATCH_SIZE )

else: 
    # Just fit model to the given training data
    model.fit( [X_train, X_angle_train], y_train, batch_size = BATCH_SIZE, epochs = EPOCHS, verbose = 2, 
               validation_data = ([X_val, X_angle_val], y_val), callbacks = callback_list )

m, s = divmod( time.time() - start_time, 60 )
print( 'Model fitting done. Total time: {}m {}s'.format(int(m), int(s)) )

model.load_weights( WEIGHT_SAVE_PATH )
val_score = model.evaluate( [X_val, X_angle_val], y_val, verbose = 1 )
print( 'Validation score: {}'.format(round(val_score[0], 5)) )
print( 'Validation accuracy: {}%'.format(round(val_score[1]*100, 2)) )
print( '='*20, '\n' )

  name=name)


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
data (InputLayer)               (None, 75, 75, 3)    0                                            
__________________________________________________________________________________________________
conv1_zeropadding (ZeroPadding2 (None, 77, 77, 3)    0           data[0][0]                       
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 38, 38, 32)   864         conv1_zeropadding[0][0]          
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 38, 38, 32)   128         conv1[0][0]                      
__________________________________________________________________________________________________
conv1_scal

Train on 3528 samples, validate on 295 samples
Epoch 1/100
 - 177s - loss: 0.5643 - acc: 0.7738 - val_loss: 5.1838 - val_acc: 0.4678
Epoch 2/100
 - 15s - loss: 0.4234 - acc: 0.8223 - val_loss: 1.7047 - val_acc: 0.4847
Epoch 3/100
 - 15s - loss: 0.3800 - acc: 0.8416 - val_loss: 1.2078 - val_acc: 0.5220
Epoch 4/100
 - 15s - loss: 0.3294 - acc: 0.8583 - val_loss: 0.4947 - val_acc: 0.7898
Epoch 5/100
 - 15s - loss: 0.3029 - acc: 0.8722 - val_loss: 0.2772 - val_acc: 0.8746
Epoch 6/100
 - 15s - loss: 0.2823 - acc: 0.8750 - val_loss: 0.2920 - val_acc: 0.8949
Epoch 7/100
 - 15s - loss: 0.2605 - acc: 0.8909 - val_loss: 0.2971 - val_acc: 0.8814
Epoch 8/100
 - 14s - loss: 0.2458 - acc: 0.8980 - val_loss: 0.3607 - val_acc: 0.8814
Epoch 9/100
 - 15s - loss: 0.2282 - acc: 0.9073 - val_loss: 0.2720 - val_acc: 0.8983
Epoch 10/100
 - 15s - loss: 0.2294 - acc: 0.9099 - val_loss: 0.2560 - val_acc: 0.8915
Epoch 11/100
 - 15s - loss: 0.2226 - acc: 0.9090 - val_loss: 0.6753 - val_acc: 0.7458
Epoch 12/100
 -