In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.utils.np_utils import to_categorical
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Flatten, Lambda, Activation,BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam

train_df = pd.read_json('../input/train.json')
test_df = pd.read_json('../input/test.json')

Using TensorFlow backend.


In [2]:
def get_image(df):
    '''Create 3-channel 'images'. Return rescale-normalised images.'''
    images = []
    for i, row in df.iterrows():
        # Formulate the bands as 75x75 arrays
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 + band_2)/2

        # Rescale
        r = (band_1 - band_1.min()) / (band_1.max() - band_1.min())
        g = (band_2 - band_2.min()) / (band_2.max() - band_2.min())
        b = (band_3 - band_3.min()) / (band_3.max() - band_3.min())

        rgb = np.dstack((r, g, b))
        images.append(rgb)
    return np.array(images)


train_x = get_image(train_df)
test_x = get_image(test_df)

print(train_x.shape,test_x.shape)

(1604, 75, 75, 3) (8424, 75, 75, 3)


In [3]:
y = train_df.is_iceberg.values
print(y[:5])

[0 0 1 0 0]


In [4]:
def rot_aut(Xtr,ytr):
    # aug on train
    data_cnt = len(ytr)
    print(data_cnt)
    aug_X = []
    aug_y = []

    for i in range(data_cnt):
        img = Xtr[i]
        tmp_y = ytr[i]

        # org img
        aug_X.append(img)
        aug_y.append(tmp_y)

        # flip
        tmp_img = np.fliplr(img)
        aug_X.append(tmp_img)
        aug_y.append(tmp_y)

        tmp_img = np.flipud(img)
        aug_X.append(tmp_img)
        aug_y.append(tmp_y)

        tmp_img = np.rot90(img)
        aug_X.append(tmp_img)
        aug_y.append(tmp_y)
    return np.array(aug_X),np.array(aug_y)

In [16]:
from keras.callbacks import ModelCheckpoint
from keras.layers import Input,AveragePooling2D,GlobalMaxPooling2D,GlobalAveragePooling2D,Flatten
from keras import layers
from keras.models import Model

def identity_block(input_tensor, kernel_size, filters, stage, block):
    """The identity block is the block that has no conv layer at shortcut.
    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names
    # Returns
        Output tensor for the block.
    """
    filters1, filters2, filters3 = filters
    bn_axis = 3
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters2, kernel_size,
               padding='same', name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    x = layers.add([x, input_tensor])
    x = Activation('relu')(x)
    return x


def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
    """A block that has a conv layer at shortcut.
    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names
    # Returns
        Output tensor for the block.
    Note that from stage 3, the first conv layer at main path is with strides=(2,2)
    And the shortcut should have strides=(2,2) as well
    """
    filters1, filters2, filters3 = filters

    bn_axis = 3
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv2D(filters1, (1, 1), strides=strides,
               name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters2, kernel_size, padding='same',
               name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    shortcut = Conv2D(filters3, (1, 1), strides=strides,
                      name=conv_name_base + '1')(input_tensor)
    shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)

    x = layers.add([x, shortcut])
    x = Activation('relu')(x)
    return x

def create_model():
    img_input = Input(shape=(75,75,3))
    channel_axis = 3
    bn_axis = 3
    x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input)
    x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv_block(x, 3, [64, 64, 96], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 96], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 96], stage=2, block='c')

    x = conv_block(x, 3, [96, 96, 128], stage=3, block='a')
    x = identity_block(x, 3, [96, 96, 128], stage=3, block='b')
    x = identity_block(x, 3, [96, 96, 128], stage=3, block='c')
    x = identity_block(x, 3, [96, 96, 128], stage=3, block='d')
    
    x = Conv2D(128, 3, strides=2, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Flatten()(x)
    x = Dense(256,activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(1,activation='sigmoid')(x)
    
    return Model(img_input, x, name='inception_v3')
print('model model')

test_m = create_model()
test_m.summary()

model model
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_13 (InputLayer)           (None, 75, 75, 3)    0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 38, 38, 64)   9472        input_13[0][0]                   
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 38, 38, 64)   256         conv1[0][0]                      
__________________________________________________________________________________________________
activation_243 (Activation)     (None, 38, 38, 64)   0           bn_conv1[0][0]                   
_________________________________________________________________________________________________

In [17]:
from sklearn.model_selection import KFold
def kfold_train(fold_cnt=3,rnd=233):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        curr_x,curr_y = rot_aut(curr_x,curr_y)
        val_x,val_y = train_x[test_index],y[test_index]
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        model.fit(curr_x,curr_y,
                  validation_data=(val_x,val_y),
                  batch_size=32, epochs=20, 
                  verbose=2,
                  callbacks=[model_chk]
                 )
        model = load_model(model_p)
        
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

1203
Train on 4812 samples, validate on 401 samples
Epoch 1/20
Epoch 00001: val_loss improved from inf to 0.70610, saving model to best_m.h5
 - 49s - loss: 0.6652 - acc: 0.6752 - val_loss: 0.7061 - val_acc: 0.4514
Epoch 2/20
Epoch 00002: val_loss did not improve
 - 13s - loss: 0.4006 - acc: 0.8059 - val_loss: 0.7092 - val_acc: 0.4613
Epoch 3/20
Epoch 00003: val_loss improved from 0.70610 to 0.46111, saving model to best_m.h5
 - 13s - loss: 0.3469 - acc: 0.8373 - val_loss: 0.4611 - val_acc: 0.7431
Epoch 4/20
Epoch 00004: val_loss improved from 0.46111 to 0.31506, saving model to best_m.h5
 - 13s - loss: 0.3032 - acc: 0.8624 - val_loss: 0.3151 - val_acc: 0.8778
Epoch 5/20
Epoch 00005: val_loss did not improve
 - 12s - loss: 0.2870 - acc: 0.8691 - val_loss: 1.4117 - val_acc: 0.5985
Epoch 6/20
Epoch 00006: val_loss improved from 0.31506 to 0.26735, saving model to best_m.h5
 - 13s - loss: 0.2635 - acc: 0.8788 - val_loss: 0.2674 - val_acc: 0.8678
Epoch 7/20
Epoch 00007: val_loss did not imp

Epoch 2/20
Epoch 00002: val_loss improved from 0.68693 to 0.68515, saving model to best_m.h5
 - 13s - loss: 0.4155 - acc: 0.8022 - val_loss: 0.6852 - val_acc: 0.5362
Epoch 3/20
Epoch 00003: val_loss did not improve
 - 13s - loss: 0.3268 - acc: 0.8495 - val_loss: 0.8829 - val_acc: 0.5761
Epoch 4/20
Epoch 00004: val_loss improved from 0.68515 to 0.41219, saving model to best_m.h5
 - 13s - loss: 0.2931 - acc: 0.8674 - val_loss: 0.4122 - val_acc: 0.8080
Epoch 5/20
Epoch 00005: val_loss did not improve
 - 13s - loss: 0.2682 - acc: 0.8757 - val_loss: 0.8438 - val_acc: 0.6309
Epoch 6/20
Epoch 00006: val_loss did not improve
 - 13s - loss: 0.2424 - acc: 0.8899 - val_loss: 0.4956 - val_acc: 0.8279
Epoch 7/20
Epoch 00007: val_loss did not improve
 - 13s - loss: 0.2166 - acc: 0.8984 - val_loss: 3.1656 - val_acc: 0.5461
Epoch 8/20
Epoch 00008: val_loss did not improve
 - 13s - loss: 0.2024 - acc: 0.9050 - val_loss: 1.3134 - val_acc: 0.5761
Epoch 9/20
Epoch 00009: val_loss did not improve
 - 13s - 

In [18]:
import pickle
with open('../features/resnet_1_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
from sklearn.metrics import log_loss
print(log_loss(y,train_pred))

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/resnet_1_sub.csv', index=False)

0.338411347687
         id  is_iceberg
0  5941774d    0.370994
1  4023181e    0.159682
2  b20200e4    0.050549
3  e7f018bb    0.993832
4  4371c8c3    0.174863
