In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.utils.np_utils import to_categorical
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Flatten, Lambda, Activation
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint,LearningRateScheduler,EarlyStopping

train_df = pd.read_json('../input/train.json')
test_df = pd.read_json('../input/test.json')

Using TensorFlow backend.


In [2]:
from skimage.exposure import equalize_adapthist

def std_img(x):
    for i in range(3):
        x[:, :, i] -= np.mean(x[:, :, i].flatten())
        x[:, :, i] /= np.std(x[:, :, i].flatten()) + 1e-7
    return x

def get_image(df):
    '''Create 3-channel 'images'. Return rescale-normalised images.'''
    images = []
    for i, row in df.iterrows():
        # Formulate the bands as 75x75 arrays
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 + band_2)/2
        
        # Rescale
        r = (band_1 - band_1.min()) / (band_1.max() - band_1.min())
        g = (band_2 - band_2.min()) / (band_2.max() - band_2.min())
        b = (band_3 - band_3.min()) / (band_3.max() - band_3.min())
        
        img = np.dstack([r,g,b])
        img = equalize_adapthist(img)
        #img = std_img(img)
        images.append(img)
    return np.array(images)


train_x = get_image(train_df)
test_x = get_image(test_df)

print(train_x.shape,test_x.shape)

  .format(dtypeobj_in, dtypeobj_out))


(1604, 75, 75, 3) (8424, 75, 75, 3)


In [3]:
y = train_df.is_iceberg.values
print(y[:5])

[0 0 1 0 0]


In [18]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(16, (3, 3), padding='same',input_shape=(75,75,3)))
    model.add(Activation('relu'))
    model.add(Conv2D(16, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    return model
print('model model')


model model


In [23]:
from sklearn.model_selection import KFold
from keras.preprocessing.image import ImageDataGenerator

def lr_f(epoch):
    if epoch<20:
        return 0.001
    elif epoch<70:
        return 0.0005
    else:
        return 0.0001
    

def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=5)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69126, saving model to best_m.h5
 - 4s - loss: 0.7288 - acc: 0.5216 - val_loss: 0.6913 - val_acc: 0.4984
Epoch 2/120
Epoch 00002: val_loss improved from 0.69126 to 0.68571, saving model to best_m.h5
 - 2s - loss: 0.6900 - acc: 0.5349 - val_loss: 0.6857 - val_acc: 0.4984
Epoch 3/120
Epoch 00003: val_loss improved from 0.68571 to 0.62120, saving model to best_m.h5
 - 2s - loss: 0.6672 - acc: 0.5464 - val_loss: 0.6212 - val_acc: 0.5857
Epoch 4/120
Epoch 00004: val_loss improved from 0.62120 to 0.54488, saving model to best_m.h5
 - 2s - loss: 0.6139 - acc: 0.6427 - val_loss: 0.5449 - val_acc: 0.6978
Epoch 5/120
Epoch 00005: val_loss improved from 0.54488 to 0.53360, saving model to best_m.h5
 - 2s - loss: 0.5737 - acc: 0.6841 - val_loss: 0.5336 - val_acc: 0.7321
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.5753 - acc: 0.6849 - val_loss: 0.5825 - val_acc: 0.6231
Epoch 7/120
Epoch 00007: val_loss improved from 0.53

Epoch 61/120
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.2630 - acc: 0.8836 - val_loss: 0.2815 - val_acc: 0.8785
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.2724 - acc: 0.8781 - val_loss: 0.3437 - val_acc: 0.8349
Epoch 63/120
Epoch 00063: val_loss improved from 0.24085 to 0.22835, saving model to best_m.h5
 - 2s - loss: 0.2712 - acc: 0.8742 - val_loss: 0.2284 - val_acc: 0.8910
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.2811 - acc: 0.8680 - val_loss: 0.2855 - val_acc: 0.8785
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.2520 - acc: 0.8883 - val_loss: 0.2649 - val_acc: 0.8941
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.2601 - acc: 0.8789 - val_loss: 0.2598 - val_acc: 0.8847
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.2584 - acc: 0.8745 - val_loss: 0.2382 - val_acc: 0.9159
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 2s - loss: 0.2412 - acc: 0.8875 - val_los

Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.6921 - acc: 0.5255 - val_loss: 0.6927 - val_acc: 0.5389
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 1s - loss: 0.6932 - acc: 0.5222 - val_loss: 0.6908 - val_acc: 0.5358
Epoch 8/120
Epoch 00008: val_loss did not improve
 - 2s - loss: 0.6882 - acc: 0.5318 - val_loss: 0.6906 - val_acc: 0.5358
Epoch 9/120
Epoch 00009: val_loss improved from 0.69059 to 0.68956, saving model to best_m.h5
 - 2s - loss: 0.6872 - acc: 0.5507 - val_loss: 0.6896 - val_acc: 0.5358
Epoch 10/120
Epoch 00010: val_loss improved from 0.68956 to 0.66806, saving model to best_m.h5
 - 2s - loss: 0.6884 - acc: 0.5403 - val_loss: 0.6681 - val_acc: 0.6231
Epoch 11/120
Epoch 00011: val_loss improved from 0.66806 to 0.66182, saving model to best_m.h5
 - 2s - loss: 0.6748 - acc: 0.5542 - val_loss: 0.6618 - val_acc: 0.6012
Epoch 12/120
Epoch 00012: val_loss improved from 0.66182 to 0.61913, saving model to best_m.h5
 - 2s - loss: 0.6663 - acc: 0.5578 - va

Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.2851 - acc: 0.8765 - val_loss: 0.3079 - val_acc: 0.8660
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.3043 - acc: 0.8625 - val_loss: 0.3059 - val_acc: 0.8567
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.2768 - acc: 0.8781 - val_loss: 0.2714 - val_acc: 0.8910
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 1s - loss: 0.2960 - acc: 0.8646 - val_loss: 0.2832 - val_acc: 0.8816
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 2s - loss: 0.2610 - acc: 0.8898 - val_loss: 0.2590 - val_acc: 0.9065
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 1s - loss: 0.2979 - acc: 0.8726 - val_loss: 0.3134 - val_acc: 0.8598
Epoch 71/120
Epoch 00071: val_loss did not improve
 - 1s - loss: 0.2723 - acc: 0.8826 - val_loss: 0.2638 - val_acc: 0.9065
Epoch 72/120
Epoch 00072: val_loss did not improve
 - 2s - loss: 0.2593 - acc: 0.8883 - val_loss: 0.2594 - val_acc: 0.9065
Epoch 73/120
Epo

Epoch 00008: val_loss improved from 0.44632 to 0.43657, saving model to best_m.h5
 - 2s - loss: 0.4755 - acc: 0.7726 - val_loss: 0.4366 - val_acc: 0.7757
Epoch 9/120
Epoch 00009: val_loss improved from 0.43657 to 0.35810, saving model to best_m.h5
 - 2s - loss: 0.4236 - acc: 0.7961 - val_loss: 0.3581 - val_acc: 0.8723
Epoch 10/120
Epoch 00010: val_loss did not improve
 - 2s - loss: 0.4342 - acc: 0.8037 - val_loss: 0.4000 - val_acc: 0.8006
Epoch 11/120
Epoch 00011: val_loss did not improve
 - 2s - loss: 0.3959 - acc: 0.8265 - val_loss: 0.3837 - val_acc: 0.8069
Epoch 12/120
Epoch 00012: val_loss improved from 0.35810 to 0.34369, saving model to best_m.h5
 - 2s - loss: 0.3935 - acc: 0.8125 - val_loss: 0.3437 - val_acc: 0.8474
Epoch 13/120
Epoch 00013: val_loss improved from 0.34369 to 0.29911, saving model to best_m.h5
 - 2s - loss: 0.4197 - acc: 0.8044 - val_loss: 0.2991 - val_acc: 0.8505
Epoch 14/120
Epoch 00014: val_loss improved from 0.29911 to 0.29376, saving model to best_m.h5
 - 2s

Epoch 71/120
Epoch 00071: val_loss did not improve
 - 1s - loss: 0.2352 - acc: 0.8974 - val_loss: 0.2585 - val_acc: 0.9003
Epoch 72/120
Epoch 00072: val_loss did not improve
 - 1s - loss: 0.2241 - acc: 0.9039 - val_loss: 0.2483 - val_acc: 0.8972
Epoch 73/120
Epoch 00073: val_loss did not improve
 - 2s - loss: 0.2310 - acc: 0.9091 - val_loss: 0.2299 - val_acc: 0.9003
Epoch 74/120
Epoch 00074: val_loss did not improve
 - 2s - loss: 0.2217 - acc: 0.8984 - val_loss: 0.2411 - val_acc: 0.8941
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 2s - loss: 0.2308 - acc: 0.8914 - val_loss: 0.2583 - val_acc: 0.8879
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 2s - loss: 0.2323 - acc: 0.9086 - val_loss: 0.2259 - val_acc: 0.8972
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 2s - loss: 0.2150 - acc: 0.9101 - val_loss: 0.2348 - val_acc: 0.9065
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 2s - loss: 0.2327 - acc: 0.8966 - val_loss: 0.2494 - val_acc: 0.8879
Epoch 79/120
Epo

Epoch 14/120
Epoch 00014: val_loss improved from 0.35298 to 0.33906, saving model to best_m.h5
 - 2s - loss: 0.4339 - acc: 0.7816 - val_loss: 0.3391 - val_acc: 0.8598
Epoch 15/120
Epoch 00015: val_loss improved from 0.33906 to 0.33351, saving model to best_m.h5
 - 2s - loss: 0.4197 - acc: 0.7961 - val_loss: 0.3335 - val_acc: 0.8536
Epoch 16/120
Epoch 00016: val_loss did not improve
 - 2s - loss: 0.3710 - acc: 0.8211 - val_loss: 0.3909 - val_acc: 0.7913
Epoch 17/120
Epoch 00017: val_loss improved from 0.33351 to 0.33262, saving model to best_m.h5
 - 2s - loss: 0.3672 - acc: 0.8281 - val_loss: 0.3326 - val_acc: 0.8411
Epoch 18/120
Epoch 00018: val_loss improved from 0.33262 to 0.32694, saving model to best_m.h5
 - 2s - loss: 0.3612 - acc: 0.8326 - val_loss: 0.3269 - val_acc: 0.8536
Epoch 19/120
Epoch 00019: val_loss improved from 0.32694 to 0.31466, saving model to best_m.h5
 - 2s - loss: 0.3539 - acc: 0.8383 - val_loss: 0.3147 - val_acc: 0.8598
Epoch 20/120
Epoch 00020: val_loss did not

Epoch 76/120
Epoch 00076: val_loss did not improve
 - 2s - loss: 0.2230 - acc: 0.9008 - val_loss: 0.2347 - val_acc: 0.8972
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 1s - loss: 0.2304 - acc: 0.9016 - val_loss: 0.2582 - val_acc: 0.8629
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 1s - loss: 0.2224 - acc: 0.8937 - val_loss: 0.2731 - val_acc: 0.8660
Epoch 79/120
Epoch 00079: val_loss did not improve
 - 1s - loss: 0.2408 - acc: 0.8930 - val_loss: 0.2408 - val_acc: 0.8972
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 1s - loss: 0.2155 - acc: 0.9070 - val_loss: 0.2360 - val_acc: 0.9003
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 1s - loss: 0.2334 - acc: 0.9047 - val_loss: 0.2336 - val_acc: 0.9003
Epoch 82/120
Epoch 00082: val_loss did not improve
 - 2s - loss: 0.2258 - acc: 0.9000 - val_loss: 0.2402 - val_acc: 0.8816
Epoch 83/120
Epoch 00083: val_loss did not improve
 - 2s - loss: 0.2344 - acc: 0.8951 - val_loss: 0.2534 - val_acc: 0.8692
Epoch 84/120
Epo

Epoch 19/120
Epoch 00019: val_loss did not improve
 - 1s - loss: 0.3588 - acc: 0.8383 - val_loss: 0.3744 - val_acc: 0.8219
Epoch 20/120
Epoch 00020: val_loss did not improve
 - 1s - loss: 0.3844 - acc: 0.8250 - val_loss: 0.3767 - val_acc: 0.7969
Epoch 21/120
Epoch 00021: val_loss did not improve
 - 1s - loss: 0.3340 - acc: 0.8515 - val_loss: 0.4191 - val_acc: 0.7969
Epoch 22/120
Epoch 00022: val_loss did not improve
 - 2s - loss: 0.3091 - acc: 0.8610 - val_loss: 0.4163 - val_acc: 0.8063
Epoch 23/120
Epoch 00023: val_loss did not improve
 - 1s - loss: 0.3092 - acc: 0.8664 - val_loss: 0.3619 - val_acc: 0.8125
Epoch 24/120
Epoch 00024: val_loss did not improve
 - 2s - loss: 0.2892 - acc: 0.8672 - val_loss: 0.3885 - val_acc: 0.8281
Epoch 25/120
Epoch 00025: val_loss did not improve
 - 2s - loss: 0.3078 - acc: 0.8649 - val_loss: 0.3718 - val_acc: 0.8313
Epoch 26/120
Epoch 00026: val_loss did not improve
 - 2s - loss: 0.3123 - acc: 0.8586 - val_loss: 0.4047 - val_acc: 0.8313
Epoch 27/120
Epo

Epoch 85/120
Epoch 00085: val_loss did not improve
 - 1s - loss: 0.2299 - acc: 0.8836 - val_loss: 0.3433 - val_acc: 0.8469
Epoch 86/120
Epoch 00086: val_loss did not improve
 - 1s - loss: 0.2139 - acc: 0.9101 - val_loss: 0.3453 - val_acc: 0.8500
Epoch 87/120
Epoch 00087: val_loss did not improve
 - 1s - loss: 0.2325 - acc: 0.8922 - val_loss: 0.3763 - val_acc: 0.8469
Epoch 88/120
Epoch 00088: val_loss did not improve
 - 1s - loss: 0.2342 - acc: 0.8953 - val_loss: 0.3471 - val_acc: 0.8531
Epoch 89/120
Epoch 00089: val_loss did not improve
 - 1s - loss: 0.2385 - acc: 0.8945 - val_loss: 0.3464 - val_acc: 0.8438
Epoch 90/120
Epoch 00090: val_loss did not improve
 - 1s - loss: 0.2250 - acc: 0.8969 - val_loss: 0.3166 - val_acc: 0.8562
Epoch 91/120
Epoch 00091: val_loss did not improve
 - 1s - loss: 0.2266 - acc: 0.9055 - val_loss: 0.3493 - val_acc: 0.8500
Epoch 92/120
Epoch 00092: val_loss did not improve
 - 1s - loss: 0.2184 - acc: 0.9016 - val_loss: 0.3157 - val_acc: 0.8562
Epoch 93/120
Epo

In [24]:
import pickle
with open('../features/cnn_1_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
from sklearn.metrics import log_loss
print(log_loss(y,train_pred))

#pre 3219
# new 2965

# skimage 2725

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_1_aug_skimage_preprocess.csv', index=False)

0.243188565358
         id  is_iceberg
0  5941774d    0.106803
1  4023181e    0.813736
2  b20200e4    0.712405
3  e7f018bb    0.997245
4  4371c8c3    0.680632


In [25]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(32, (3, 3),input_shape=(75, 75, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1, activation='sigmoid'))
    return model
print('model model')

model model


In [26]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=5)


with open('../features/cnn_2_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2728
# skimage 2710

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_2_aug_skimage_preprocess.csv', index=False)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69153, saving model to best_m.h5
 - 4s - loss: 0.6867 - acc: 0.5161 - val_loss: 0.6915 - val_acc: 0.5327
Epoch 2/120
Epoch 00002: val_loss improved from 0.69153 to 0.63874, saving model to best_m.h5
 - 1s - loss: 0.6730 - acc: 0.5716 - val_loss: 0.6387 - val_acc: 0.5701
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 1s - loss: 0.6453 - acc: 0.5888 - val_loss: 0.6630 - val_acc: 0.6511
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 1s - loss: 0.6629 - acc: 0.6318 - val_loss: 0.6436 - val_acc: 0.6791
Epoch 5/120
Epoch 00005: val_loss improved from 0.63874 to 0.57448, saving model to best_m.h5
 - 1s - loss: 0.6247 - acc: 0.6587 - val_loss: 0.5745 - val_acc: 0.6978
Epoch 6/120
Epoch 00006: val_loss improved from 0.57448 to 0.49654, saving model to best_m.h5
 - 1s - loss: 0.5860 - acc: 0.6914 - val_loss: 0.4965 - val_acc: 0.7539
Epoch 7/120
Epoch 00007: val_loss improved from 0.49654 to 0.44724, saving model to best_m.h5
 -

Epoch 62/120
Epoch 00062: val_loss improved from 0.22199 to 0.22123, saving model to best_m.h5
 - 1s - loss: 0.2360 - acc: 0.8922 - val_loss: 0.2212 - val_acc: 0.9003
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.2518 - acc: 0.8945 - val_loss: 0.2337 - val_acc: 0.8879
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.2250 - acc: 0.9031 - val_loss: 0.2306 - val_acc: 0.9003
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.2394 - acc: 0.9000 - val_loss: 0.2227 - val_acc: 0.9034
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.2330 - acc: 0.9016 - val_loss: 0.2692 - val_acc: 0.8629
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.2273 - acc: 0.8984 - val_loss: 0.2527 - val_acc: 0.8847
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 1s - loss: 0.2182 - acc: 0.9086 - val_loss: 0.2512 - val_acc: 0.8972
Epoch 69/120
Epoch 00069: val_loss improved from 0.22123 to 0.19931, saving model to best_m.h5


Epoch 6/120
Epoch 00006: val_loss improved from 0.41825 to 0.36533, saving model to best_m.h5
 - 1s - loss: 0.4042 - acc: 0.8138 - val_loss: 0.3653 - val_acc: 0.8100
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 1s - loss: 0.4012 - acc: 0.8133 - val_loss: 0.4092 - val_acc: 0.8318
Epoch 8/120
Epoch 00008: val_loss did not improve
 - 1s - loss: 0.4172 - acc: 0.8154 - val_loss: 0.4214 - val_acc: 0.8318
Epoch 9/120
Epoch 00009: val_loss improved from 0.36533 to 0.31337, saving model to best_m.h5
 - 1s - loss: 0.3776 - acc: 0.8363 - val_loss: 0.3134 - val_acc: 0.8598
Epoch 10/120
Epoch 00010: val_loss improved from 0.31337 to 0.29439, saving model to best_m.h5
 - 1s - loss: 0.3518 - acc: 0.8445 - val_loss: 0.2944 - val_acc: 0.8816
Epoch 11/120
Epoch 00011: val_loss did not improve
 - 1s - loss: 0.3408 - acc: 0.8523 - val_loss: 0.2949 - val_acc: 0.8723
Epoch 12/120
Epoch 00012: val_loss did not improve
 - 1s - loss: 0.3338 - acc: 0.8547 - val_loss: 0.3016 - val_acc: 0.8692
Epoch 13/12

Epoch 68/120
Epoch 00068: val_loss did not improve
 - 1s - loss: 0.2047 - acc: 0.9091 - val_loss: 0.3122 - val_acc: 0.9003
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 1s - loss: 0.2253 - acc: 0.9094 - val_loss: 0.2078 - val_acc: 0.8972
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 1s - loss: 0.2232 - acc: 0.9016 - val_loss: 0.2078 - val_acc: 0.9003
Epoch 71/120
Epoch 00071: val_loss did not improve
 - 1s - loss: 0.2041 - acc: 0.9062 - val_loss: 0.1989 - val_acc: 0.9065
Epoch 72/120
Epoch 00072: val_loss did not improve
 - 1s - loss: 0.2063 - acc: 0.9101 - val_loss: 0.1950 - val_acc: 0.9097
Epoch 73/120
Epoch 00073: val_loss did not improve
 - 1s - loss: 0.2084 - acc: 0.9148 - val_loss: 0.1961 - val_acc: 0.9034
Epoch 74/120
Epoch 00074: val_loss did not improve
 - 1s - loss: 0.1663 - acc: 0.9351 - val_loss: 0.2007 - val_acc: 0.9034
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 1s - loss: 0.2091 - acc: 0.9101 - val_loss: 0.1918 - val_acc: 0.9003
Epoch 76/120
Epo

Epoch 00011: val_loss did not improve
 - 1s - loss: 0.4895 - acc: 0.7771 - val_loss: 0.3954 - val_acc: 0.8131
Epoch 12/120
Epoch 00012: val_loss improved from 0.36094 to 0.34405, saving model to best_m.h5
 - 1s - loss: 0.4126 - acc: 0.8156 - val_loss: 0.3441 - val_acc: 0.8567
Epoch 13/120
Epoch 00013: val_loss did not improve
 - 1s - loss: 0.4110 - acc: 0.8201 - val_loss: 0.3759 - val_acc: 0.8318
Epoch 14/120
Epoch 00014: val_loss improved from 0.34405 to 0.30776, saving model to best_m.h5
 - 1s - loss: 0.4058 - acc: 0.8156 - val_loss: 0.3078 - val_acc: 0.8598
Epoch 15/120
Epoch 00015: val_loss improved from 0.30776 to 0.30084, saving model to best_m.h5
 - 1s - loss: 0.3846 - acc: 0.8156 - val_loss: 0.3008 - val_acc: 0.8660
Epoch 16/120
Epoch 00016: val_loss did not improve
 - 1s - loss: 0.3789 - acc: 0.8398 - val_loss: 0.3215 - val_acc: 0.8567
Epoch 17/120
Epoch 00017: val_loss did not improve
 - 1s - loss: 0.4032 - acc: 0.8203 - val_loss: 0.3318 - val_acc: 0.8318
Epoch 18/120
Epoch 0

Epoch 74/120
Epoch 00074: val_loss did not improve
 - 1s - loss: 0.2285 - acc: 0.8906 - val_loss: 0.2183 - val_acc: 0.9034
Epoch 75/120
Epoch 00075: val_loss improved from 0.21650 to 0.21437, saving model to best_m.h5
 - 1s - loss: 0.2347 - acc: 0.8937 - val_loss: 0.2144 - val_acc: 0.8941
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 1s - loss: 0.2251 - acc: 0.9047 - val_loss: 0.2257 - val_acc: 0.8972
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 1s - loss: 0.2357 - acc: 0.8980 - val_loss: 0.2230 - val_acc: 0.9159
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 1s - loss: 0.2305 - acc: 0.8880 - val_loss: 0.2187 - val_acc: 0.8972
Epoch 79/120
Epoch 00079: val_loss did not improve
 - 1s - loss: 0.2225 - acc: 0.9062 - val_loss: 0.2238 - val_acc: 0.9128
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 1s - loss: 0.2349 - acc: 0.8976 - val_loss: 0.2212 - val_acc: 0.9065
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 1s - loss: 0.2152 - acc: 0.8937 - val_los

Epoch 16/120
Epoch 00016: val_loss improved from 0.33169 to 0.32081, saving model to best_m.h5
 - 1s - loss: 0.3293 - acc: 0.8437 - val_loss: 0.3208 - val_acc: 0.8598
Epoch 17/120
Epoch 00017: val_loss did not improve
 - 1s - loss: 0.3309 - acc: 0.8648 - val_loss: 0.3328 - val_acc: 0.8567
Epoch 18/120
Epoch 00018: val_loss improved from 0.32081 to 0.30864, saving model to best_m.h5
 - 1s - loss: 0.3452 - acc: 0.8484 - val_loss: 0.3086 - val_acc: 0.8847
Epoch 19/120
Epoch 00019: val_loss improved from 0.30864 to 0.29621, saving model to best_m.h5
 - 1s - loss: 0.2731 - acc: 0.8867 - val_loss: 0.2962 - val_acc: 0.8816
Epoch 20/120
Epoch 00020: val_loss improved from 0.29621 to 0.29492, saving model to best_m.h5
 - 1s - loss: 0.3055 - acc: 0.8664 - val_loss: 0.2949 - val_acc: 0.8754
Epoch 21/120
Epoch 00021: val_loss did not improve
 - 1s - loss: 0.2992 - acc: 0.8755 - val_loss: 0.3464 - val_acc: 0.8536
Epoch 22/120
Epoch 00022: val_loss improved from 0.29492 to 0.29125, saving model to b

Epoch 78/120
Epoch 00078: val_loss did not improve
 - 1s - loss: 0.2044 - acc: 0.9216 - val_loss: 0.2813 - val_acc: 0.8972
Epoch 79/120
Epoch 00079: val_loss did not improve
 - 1s - loss: 0.1837 - acc: 0.9281 - val_loss: 0.2623 - val_acc: 0.9065
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 1s - loss: 0.1812 - acc: 0.9289 - val_loss: 0.2757 - val_acc: 0.9003
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 1s - loss: 0.1868 - acc: 0.9203 - val_loss: 0.2764 - val_acc: 0.8941
Epoch 82/120
Epoch 00082: val_loss did not improve
 - 1s - loss: 0.1698 - acc: 0.9234 - val_loss: 0.2848 - val_acc: 0.9003
Epoch 83/120
Epoch 00083: val_loss did not improve
 - 1s - loss: 0.1686 - acc: 0.9234 - val_loss: 0.2743 - val_acc: 0.9034
Epoch 84/120
Epoch 00084: val_loss did not improve
 - 1s - loss: 0.1804 - acc: 0.9234 - val_loss: 0.2774 - val_acc: 0.8972
Epoch 85/120
Epoch 00085: val_loss did not improve
 - 1s - loss: 0.1683 - acc: 0.9281 - val_loss: 0.2792 - val_acc: 0.9065
Epoch 86/120
Epo

Epoch 00021: val_loss did not improve
 - 1s - loss: 0.2779 - acc: 0.8774 - val_loss: 0.3545 - val_acc: 0.8406
Epoch 22/120
Epoch 00022: val_loss improved from 0.34581 to 0.32603, saving model to best_m.h5
 - 1s - loss: 0.2764 - acc: 0.8805 - val_loss: 0.3260 - val_acc: 0.8875
Epoch 23/120
Epoch 00023: val_loss did not improve
 - 1s - loss: 0.2463 - acc: 0.8945 - val_loss: 0.3582 - val_acc: 0.8750
Epoch 24/120
Epoch 00024: val_loss improved from 0.32603 to 0.31439, saving model to best_m.h5
 - 1s - loss: 0.2720 - acc: 0.8789 - val_loss: 0.3144 - val_acc: 0.8750
Epoch 25/120
Epoch 00025: val_loss did not improve
 - 1s - loss: 0.2712 - acc: 0.8750 - val_loss: 0.3182 - val_acc: 0.8688
Epoch 26/120
Epoch 00026: val_loss did not improve
 - 1s - loss: 0.2410 - acc: 0.8868 - val_loss: 0.3330 - val_acc: 0.8750
Epoch 27/120
Epoch 00027: val_loss did not improve
 - 1s - loss: 0.2524 - acc: 0.8836 - val_loss: 0.3280 - val_acc: 0.8750
Epoch 28/120
Epoch 00028: val_loss did not improve
 - 1s - loss:

Epoch 87/120
Epoch 00087: val_loss did not improve
 - 1s - loss: 0.1617 - acc: 0.9258 - val_loss: 0.3151 - val_acc: 0.8875
Epoch 88/120
Epoch 00088: val_loss did not improve
 - 1s - loss: 0.1749 - acc: 0.9227 - val_loss: 0.3299 - val_acc: 0.8875
Epoch 89/120
Epoch 00089: val_loss did not improve
 - 1s - loss: 0.1714 - acc: 0.9336 - val_loss: 0.3252 - val_acc: 0.8812
Epoch 90/120
Epoch 00090: val_loss did not improve
 - 1s - loss: 0.1799 - acc: 0.9289 - val_loss: 0.3218 - val_acc: 0.8844
Epoch 91/120
Epoch 00091: val_loss did not improve
 - 1s - loss: 0.2007 - acc: 0.9165 - val_loss: 0.2989 - val_acc: 0.8938
Epoch 92/120
Epoch 00092: val_loss did not improve
 - 1s - loss: 0.1942 - acc: 0.9250 - val_loss: 0.3094 - val_acc: 0.8844
Epoch 93/120
Epoch 00093: val_loss did not improve
 - 1s - loss: 0.1676 - acc: 0.9320 - val_loss: 0.3135 - val_acc: 0.8844
Epoch 94/120
Epoch 00094: val_loss did not improve
 - 1s - loss: 0.1850 - acc: 0.9203 - val_loss: 0.3198 - val_acc: 0.8875
Epoch 95/120
Epo

In [27]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu' ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')

model model


In [28]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=5)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.65802, saving model to best_m.h5
 - 6s - loss: 0.7042 - acc: 0.5235 - val_loss: 0.6580 - val_acc: 0.6106
Epoch 2/120
Epoch 00002: val_loss improved from 0.65802 to 0.65578, saving model to best_m.h5
 - 2s - loss: 0.6640 - acc: 0.5622 - val_loss: 0.6558 - val_acc: 0.4984
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 2s - loss: 0.6443 - acc: 0.6122 - val_loss: 0.6873 - val_acc: 0.5109
Epoch 4/120
Epoch 00004: val_loss improved from 0.65578 to 0.54712, saving model to best_m.h5
 - 2s - loss: 0.6012 - acc: 0.6511 - val_loss: 0.5471 - val_acc: 0.7414
Epoch 5/120
Epoch 00005: val_loss improved from 0.54712 to 0.49644, saving model to best_m.h5
 - 2s - loss: 0.5647 - acc: 0.6925 - val_loss: 0.4964 - val_acc: 0.7695
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.5968 - acc: 0.6630 - val_loss: 0.5257 - val_acc: 0.7913
Epoch 7/120
Epoch 00007: val_loss improved from 0.49644 to 0.46008, saving model to best_m.h5
 -

Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.2534 - acc: 0.8875 - val_loss: 0.2383 - val_acc: 0.9003
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.2776 - acc: 0.8937 - val_loss: 0.2248 - val_acc: 0.9065
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.2614 - acc: 0.8896 - val_loss: 0.2451 - val_acc: 0.8754
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 2s - loss: 0.2754 - acc: 0.8804 - val_loss: 0.2349 - val_acc: 0.9097
Epoch 68/120
Epoch 00068: val_loss improved from 0.22157 to 0.21677, saving model to best_m.h5
 - 2s - loss: 0.2779 - acc: 0.8781 - val_loss: 0.2168 - val_acc: 0.9097
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 2s - loss: 0.2795 - acc: 0.8805 - val_loss: 0.2409 - val_acc: 0.8847
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 2s - loss: 0.2640 - acc: 0.8914 - val_loss: 0.2545 - val_acc: 0.8816
Epoch 71/120
Epoch 00071: val_loss did not improve
 - 2s - loss: 0.2689 - acc: 0.8953 - val_los

Epoch 8/120
Epoch 00008: val_loss did not improve
 - 2s - loss: 0.5370 - acc: 0.7232 - val_loss: 0.5882 - val_acc: 0.6822
Epoch 9/120
Epoch 00009: val_loss improved from 0.52330 to 0.52288, saving model to best_m.h5
 - 2s - loss: 0.5099 - acc: 0.7419 - val_loss: 0.5229 - val_acc: 0.6791
Epoch 10/120
Epoch 00010: val_loss improved from 0.52288 to 0.44532, saving model to best_m.h5
 - 2s - loss: 0.4867 - acc: 0.7664 - val_loss: 0.4453 - val_acc: 0.8006
Epoch 11/120
Epoch 00011: val_loss improved from 0.44532 to 0.43817, saving model to best_m.h5
 - 2s - loss: 0.4673 - acc: 0.7693 - val_loss: 0.4382 - val_acc: 0.8224
Epoch 12/120
Epoch 00012: val_loss did not improve
 - 2s - loss: 0.4491 - acc: 0.7867 - val_loss: 0.4494 - val_acc: 0.7913
Epoch 13/120
Epoch 00013: val_loss improved from 0.43817 to 0.34810, saving model to best_m.h5
 - 2s - loss: 0.4198 - acc: 0.8101 - val_loss: 0.3481 - val_acc: 0.8567
Epoch 14/120
Epoch 00014: val_loss improved from 0.34810 to 0.30911, saving model to bes

Epoch 70/120
Epoch 00070: val_loss did not improve
 - 2s - loss: 0.2649 - acc: 0.8812 - val_loss: 0.2554 - val_acc: 0.8941
Epoch 71/120
Epoch 00071: val_loss did not improve
 - 2s - loss: 0.2684 - acc: 0.8841 - val_loss: 0.2346 - val_acc: 0.9159
Epoch 72/120
Epoch 00072: val_loss did not improve
 - 2s - loss: 0.2381 - acc: 0.9008 - val_loss: 0.2372 - val_acc: 0.9034
Epoch 73/120
Epoch 00073: val_loss improved from 0.22911 to 0.22527, saving model to best_m.h5
 - 2s - loss: 0.2453 - acc: 0.8976 - val_loss: 0.2253 - val_acc: 0.9283
Epoch 74/120
Epoch 00074: val_loss improved from 0.22527 to 0.22300, saving model to best_m.h5
 - 2s - loss: 0.2534 - acc: 0.8906 - val_loss: 0.2230 - val_acc: 0.9128
Epoch 75/120
Epoch 00075: val_loss improved from 0.22300 to 0.22207, saving model to best_m.h5
 - 2s - loss: 0.2331 - acc: 0.9086 - val_loss: 0.2221 - val_acc: 0.9221
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 2s - loss: 0.2442 - acc: 0.8945 - val_loss: 0.2423 - val_acc: 0.8972
Epoch 7

Epoch 11/120
Epoch 00011: val_loss did not improve
 - 2s - loss: 0.4627 - acc: 0.7675 - val_loss: 0.5049 - val_acc: 0.7975
Epoch 12/120
Epoch 00012: val_loss improved from 0.41136 to 0.32451, saving model to best_m.h5
 - 2s - loss: 0.5066 - acc: 0.7656 - val_loss: 0.3245 - val_acc: 0.8536
Epoch 13/120
Epoch 00013: val_loss did not improve
 - 2s - loss: 0.4342 - acc: 0.8019 - val_loss: 0.3369 - val_acc: 0.8660
Epoch 14/120
Epoch 00014: val_loss did not improve
 - 2s - loss: 0.4111 - acc: 0.8133 - val_loss: 0.3714 - val_acc: 0.8598
Epoch 15/120
Epoch 00015: val_loss improved from 0.32451 to 0.28205, saving model to best_m.h5
 - 2s - loss: 0.4113 - acc: 0.8133 - val_loss: 0.2820 - val_acc: 0.8847
Epoch 16/120
Epoch 00016: val_loss did not improve
 - 2s - loss: 0.3945 - acc: 0.8172 - val_loss: 0.3816 - val_acc: 0.8100
Epoch 17/120
Epoch 00017: val_loss did not improve
 - 2s - loss: 0.4350 - acc: 0.7919 - val_loss: 0.4250 - val_acc: 0.7850
Epoch 18/120
Epoch 00018: val_loss did not improve


Epoch 74/120
Epoch 00074: val_loss did not improve
 - 2s - loss: 0.2797 - acc: 0.8719 - val_loss: 0.2273 - val_acc: 0.8972
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 2s - loss: 0.2666 - acc: 0.8812 - val_loss: 0.2245 - val_acc: 0.9065
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 2s - loss: 0.2658 - acc: 0.8753 - val_loss: 0.2194 - val_acc: 0.9065
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 2s - loss: 0.2509 - acc: 0.8883 - val_loss: 0.2190 - val_acc: 0.8941
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 2s - loss: 0.2612 - acc: 0.8836 - val_loss: 0.2281 - val_acc: 0.8879
Epoch 79/120
Epoch 00079: val_loss did not improve
 - 2s - loss: 0.2801 - acc: 0.8742 - val_loss: 0.2304 - val_acc: 0.8972
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 2s - loss: 0.2703 - acc: 0.8805 - val_loss: 0.2336 - val_acc: 0.8879
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 2s - loss: 0.2535 - acc: 0.8922 - val_loss: 0.2265 - val_acc: 0.8972
Epoch 82/120
Epo

Epoch 16/120
Epoch 00016: val_loss improved from 0.38360 to 0.36666, saving model to best_m.h5
 - 2s - loss: 0.3970 - acc: 0.8227 - val_loss: 0.3667 - val_acc: 0.8349
Epoch 17/120
Epoch 00017: val_loss did not improve
 - 2s - loss: 0.4123 - acc: 0.8187 - val_loss: 0.4018 - val_acc: 0.7944
Epoch 18/120
Epoch 00018: val_loss did not improve
 - 2s - loss: 0.3951 - acc: 0.8191 - val_loss: 0.3946 - val_acc: 0.8131
Epoch 19/120
Epoch 00019: val_loss did not improve
 - 2s - loss: 0.4124 - acc: 0.8008 - val_loss: 0.3856 - val_acc: 0.8287
Epoch 20/120
Epoch 00020: val_loss improved from 0.36666 to 0.33251, saving model to best_m.h5
 - 2s - loss: 0.3568 - acc: 0.8453 - val_loss: 0.3325 - val_acc: 0.8567
Epoch 21/120
Epoch 00021: val_loss did not improve
 - 2s - loss: 0.3727 - acc: 0.8359 - val_loss: 0.3532 - val_acc: 0.8287
Epoch 22/120
Epoch 00022: val_loss improved from 0.33251 to 0.32525, saving model to best_m.h5
 - 2s - loss: 0.3629 - acc: 0.8328 - val_loss: 0.3253 - val_acc: 0.8505
Epoch 2

Epoch 78/120
Epoch 00078: val_loss did not improve
 - 2s - loss: 0.2579 - acc: 0.8922 - val_loss: 0.2686 - val_acc: 0.8754
Epoch 79/120
Epoch 00079: val_loss did not improve
 - 2s - loss: 0.2402 - acc: 0.8984 - val_loss: 0.2880 - val_acc: 0.8598
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 2s - loss: 0.2434 - acc: 0.8910 - val_loss: 0.2762 - val_acc: 0.8660
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 2s - loss: 0.2467 - acc: 0.8937 - val_loss: 0.2720 - val_acc: 0.8660
Epoch 82/120
Epoch 00082: val_loss did not improve
 - 2s - loss: 0.2458 - acc: 0.9016 - val_loss: 0.2685 - val_acc: 0.8723
Epoch 83/120
Epoch 00083: val_loss did not improve
 - 2s - loss: 0.2382 - acc: 0.9047 - val_loss: 0.2709 - val_acc: 0.8754
Epoch 84/120
Epoch 00084: val_loss improved from 0.26813 to 0.26173, saving model to best_m.h5
 - 2s - loss: 0.2448 - acc: 0.8984 - val_loss: 0.2617 - val_acc: 0.8879
Epoch 85/120
Epoch 00085: val_loss did not improve
 - 2s - loss: 0.2341 - acc: 0.8883 - val_los

Epoch 19/120
Epoch 00019: val_loss improved from 0.35981 to 0.34861, saving model to best_m.h5
 - 2s - loss: 0.3972 - acc: 0.8180 - val_loss: 0.3486 - val_acc: 0.8500
Epoch 20/120
Epoch 00020: val_loss did not improve
 - 2s - loss: 0.3654 - acc: 0.8336 - val_loss: 0.3806 - val_acc: 0.8156
Epoch 21/120
Epoch 00021: val_loss improved from 0.34861 to 0.34726, saving model to best_m.h5
 - 2s - loss: 0.3703 - acc: 0.8281 - val_loss: 0.3473 - val_acc: 0.8625
Epoch 22/120
Epoch 00022: val_loss improved from 0.34726 to 0.32293, saving model to best_m.h5
 - 2s - loss: 0.3329 - acc: 0.8477 - val_loss: 0.3229 - val_acc: 0.8688
Epoch 23/120
Epoch 00023: val_loss did not improve
 - 2s - loss: 0.3395 - acc: 0.8492 - val_loss: 0.3249 - val_acc: 0.8688
Epoch 24/120
Epoch 00024: val_loss improved from 0.32293 to 0.31745, saving model to best_m.h5
 - 2s - loss: 0.3350 - acc: 0.8422 - val_loss: 0.3174 - val_acc: 0.8719
Epoch 25/120
Epoch 00025: val_loss did not improve
 - 2s - loss: 0.3225 - acc: 0.8547 

Epoch 82/120
Epoch 00082: val_loss did not improve
 - 2s - loss: 0.2288 - acc: 0.8906 - val_loss: 0.2847 - val_acc: 0.8844
Epoch 83/120
Epoch 00083: val_loss did not improve
 - 2s - loss: 0.2299 - acc: 0.9023 - val_loss: 0.2835 - val_acc: 0.8969
Epoch 84/120
Epoch 00084: val_loss did not improve
 - 2s - loss: 0.2362 - acc: 0.8992 - val_loss: 0.2773 - val_acc: 0.8906
Epoch 85/120
Epoch 00085: val_loss did not improve
 - 2s - loss: 0.2464 - acc: 0.8914 - val_loss: 0.3029 - val_acc: 0.8844
Epoch 86/120
Epoch 00086: val_loss did not improve
 - 2s - loss: 0.2391 - acc: 0.8930 - val_loss: 0.2849 - val_acc: 0.9000
Epoch 87/120
Epoch 00087: val_loss did not improve
 - 2s - loss: 0.2237 - acc: 0.8891 - val_loss: 0.2855 - val_acc: 0.9000
Epoch 88/120
Epoch 00088: val_loss did not improve
 - 2s - loss: 0.2467 - acc: 0.9039 - val_loss: 0.2843 - val_acc: 0.8938
Epoch 89/120
Epoch 00089: val_loss did not improve
 - 2s - loss: 0.2520 - acc: 0.8797 - val_loss: 0.2749 - val_acc: 0.8906
Epoch 90/120
Epo

In [29]:
with open('../features/cnn_3_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2737
# new  2509

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_3_aug_skimage_preprocess.csv', index=False)


0.231176994241
         id  is_iceberg
0  5941774d    0.177906
1  4023181e    0.795293
2  b20200e4    0.580205
3  e7f018bb    0.995355
4  4371c8c3    0.813568


In [30]:
from keras.callbacks import ModelCheckpoint,LearningRateScheduler
def ConvBlock(model, layers, filters):
    '''Create [layers] layers consisting of zero padding, a convolution with [filters] 3x3 filters and batch normalization. Perform max pooling after the last layer.'''
    for i in range(layers):
        model.add(ZeroPadding2D((1, 1)))
        model.add(Conv2D(filters, (3, 3), activation='relu'))
        model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()

    # Input image: 75x75x3
    model.add(Lambda(lambda x: x, input_shape=(75, 75, 3)))
    ConvBlock(model, 1, 32)
    # 37x37x32
    ConvBlock(model, 1, 64)
    # 18x18x64
    ConvBlock(model, 1, 128)
    # 9x9x128
    ConvBlock(model, 1, 128)
    # 4x4x128
    model.add(Flatten())
    model.add(Dense(1,activation='sigmoid'))
    
    return model
print('model model')


model model


In [33]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=5)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.57041, saving model to best_m.h5
 - 9s - loss: 0.6576 - acc: 0.7039 - val_loss: 0.5704 - val_acc: 0.6542
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 3s - loss: 0.5001 - acc: 0.7833 - val_loss: 1.2920 - val_acc: 0.5234
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 3s - loss: 0.4836 - acc: 0.7851 - val_loss: 2.4837 - val_acc: 0.5109
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.4475 - acc: 0.7966 - val_loss: 0.6592 - val_acc: 0.7601
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 2s - loss: 0.4430 - acc: 0.8013 - val_loss: 0.5944 - val_acc: 0.7819
Epoch 6/120
Epoch 00006: val_loss improved from 0.57041 to 0.48112, saving model to best_m.h5
 - 3s - loss: 0.3770 - acc: 0.8320 - val_loss: 0.4811 - val_acc: 0.7944
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 4s - loss: 0.3817 - acc: 0.8209 - val_loss: 0.6820 - val_acc: 0.7352
Epoch 8/120
Epoch 00008: val_loss improved from 0.48112 to 0.3

Epoch 00063: val_loss did not improve
 - 2s - loss: 0.2311 - acc: 0.8988 - val_loss: 0.4481 - val_acc: 0.8442
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.1952 - acc: 0.9141 - val_loss: 0.4157 - val_acc: 0.8380
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.1594 - acc: 0.9312 - val_loss: 0.3564 - val_acc: 0.8567
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.1813 - acc: 0.9206 - val_loss: 0.3109 - val_acc: 0.8972
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 2s - loss: 0.1809 - acc: 0.9240 - val_loss: 0.3541 - val_acc: 0.8754
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 3s - loss: 0.1338 - acc: 0.9484 - val_loss: 0.2942 - val_acc: 0.8910
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 2s - loss: 0.1738 - acc: 0.9234 - val_loss: 0.3565 - val_acc: 0.8754
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 2s - loss: 0.1929 - acc: 0.9219 - val_loss: 0.5899 - val_acc: 0.7788
Epoch 71/120
Epoch 00071: val

Epoch 9/120
Epoch 00009: val_loss did not improve
 - 2s - loss: 0.3887 - acc: 0.8277 - val_loss: 0.5366 - val_acc: 0.7757
Epoch 10/120
Epoch 00010: val_loss improved from 0.44922 to 0.29476, saving model to best_m.h5
 - 3s - loss: 0.3560 - acc: 0.8419 - val_loss: 0.2948 - val_acc: 0.8692
Epoch 11/120
Epoch 00011: val_loss did not improve
 - 2s - loss: 0.3542 - acc: 0.8480 - val_loss: 0.7178 - val_acc: 0.7165
Epoch 12/120
Epoch 00012: val_loss did not improve
 - 2s - loss: 0.3615 - acc: 0.8292 - val_loss: 0.6044 - val_acc: 0.7913
Epoch 13/120
Epoch 00013: val_loss did not improve
 - 2s - loss: 0.3481 - acc: 0.8459 - val_loss: 0.4222 - val_acc: 0.8505
Epoch 14/120
Epoch 00014: val_loss did not improve
 - 2s - loss: 0.3491 - acc: 0.8363 - val_loss: 0.6895 - val_acc: 0.7445
Epoch 15/120
Epoch 00015: val_loss improved from 0.29476 to 0.29466, saving model to best_m.h5
 - 2s - loss: 0.3375 - acc: 0.8422 - val_loss: 0.2947 - val_acc: 0.8660
Epoch 16/120
Epoch 00016: val_loss improved from 0.2

Epoch 74/120
Epoch 00074: val_loss did not improve
 - 2s - loss: 0.1654 - acc: 0.9336 - val_loss: 0.2635 - val_acc: 0.9065
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 2s - loss: 0.1409 - acc: 0.9453 - val_loss: 0.2582 - val_acc: 0.8941
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 2s - loss: 0.1499 - acc: 0.9430 - val_loss: 0.2424 - val_acc: 0.9065
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 2s - loss: 0.1395 - acc: 0.9414 - val_loss: 0.2484 - val_acc: 0.9159
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 2s - loss: 0.1340 - acc: 0.9469 - val_loss: 0.2669 - val_acc: 0.9065
Epoch 79/120
Epoch 00079: val_loss did not improve
 - 2s - loss: 0.1333 - acc: 0.9453 - val_loss: 0.2509 - val_acc: 0.9128
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 2s - loss: 0.1318 - acc: 0.9523 - val_loss: 0.2595 - val_acc: 0.9159
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 2s - loss: 0.1438 - acc: 0.9357 - val_loss: 0.2578 - val_acc: 0.9159
Epoch 82/120
Epo

Epoch 19/120
Epoch 00019: val_loss improved from 0.23430 to 0.21843, saving model to best_m.h5
 - 2s - loss: 0.2946 - acc: 0.8677 - val_loss: 0.2184 - val_acc: 0.9159
Epoch 20/120
Epoch 00020: val_loss did not improve
 - 2s - loss: 0.3440 - acc: 0.8535 - val_loss: 0.2247 - val_acc: 0.8816
Epoch 21/120
Epoch 00021: val_loss improved from 0.21843 to 0.21041, saving model to best_m.h5
 - 2s - loss: 0.2612 - acc: 0.8945 - val_loss: 0.2104 - val_acc: 0.9190
Epoch 22/120
Epoch 00022: val_loss did not improve
 - 2s - loss: 0.2829 - acc: 0.8695 - val_loss: 0.2528 - val_acc: 0.8816
Epoch 23/120
Epoch 00023: val_loss did not improve
 - 2s - loss: 0.2537 - acc: 0.8873 - val_loss: 0.2429 - val_acc: 0.8910
Epoch 24/120
Epoch 00024: val_loss did not improve
 - 2s - loss: 0.2589 - acc: 0.8888 - val_loss: 0.2258 - val_acc: 0.8816
Epoch 25/120
Epoch 00025: val_loss did not improve
 - 2s - loss: 0.2844 - acc: 0.8638 - val_loss: 0.4689 - val_acc: 0.7850
Epoch 26/120
Epoch 00026: val_loss improved from 0.

Epoch 84/120
Epoch 00084: val_loss did not improve
 - 2s - loss: 0.1363 - acc: 0.9492 - val_loss: 0.1994 - val_acc: 0.9034
Epoch 85/120
Epoch 00085: val_loss did not improve
 - 2s - loss: 0.1398 - acc: 0.9437 - val_loss: 0.1951 - val_acc: 0.9003
Epoch 86/120
Epoch 00086: val_loss did not improve
 - 2s - loss: 0.1191 - acc: 0.9537 - val_loss: 0.2002 - val_acc: 0.9034
Epoch 87/120
Epoch 00087: val_loss did not improve
 - 2s - loss: 0.1385 - acc: 0.9516 - val_loss: 0.2106 - val_acc: 0.9065
Epoch 88/120
Epoch 00088: val_loss did not improve
 - 2s - loss: 0.1296 - acc: 0.9461 - val_loss: 0.2329 - val_acc: 0.9034
Epoch 89/120
Epoch 00089: val_loss did not improve
 - 2s - loss: 0.1257 - acc: 0.9469 - val_loss: 0.2009 - val_acc: 0.9097
Epoch 90/120
Epoch 00090: val_loss did not improve
 - 2s - loss: 0.1188 - acc: 0.9594 - val_loss: 0.2220 - val_acc: 0.9065
Epoch 91/120
Epoch 00091: val_loss did not improve
 - 2s - loss: 0.1048 - acc: 0.9609 - val_loss: 0.2182 - val_acc: 0.9128
Epoch 92/120
Epo

Epoch 28/120
Epoch 00028: val_loss did not improve
 - 2s - loss: 0.2312 - acc: 0.9013 - val_loss: 0.3063 - val_acc: 0.8660
Epoch 29/120
Epoch 00029: val_loss did not improve
 - 2s - loss: 0.2312 - acc: 0.8964 - val_loss: 0.6984 - val_acc: 0.7539
Epoch 30/120
Epoch 00030: val_loss did not improve
 - 2s - loss: 0.2390 - acc: 0.8984 - val_loss: 0.5709 - val_acc: 0.7757
Epoch 31/120
Epoch 00031: val_loss did not improve
 - 2s - loss: 0.2246 - acc: 0.9054 - val_loss: 0.3348 - val_acc: 0.8536
Epoch 32/120
Epoch 00032: val_loss improved from 0.24895 to 0.24214, saving model to best_m.h5
 - 2s - loss: 0.2582 - acc: 0.8865 - val_loss: 0.2421 - val_acc: 0.8941
Epoch 33/120
Epoch 00033: val_loss did not improve
 - 2s - loss: 0.2405 - acc: 0.9094 - val_loss: 0.2756 - val_acc: 0.8723
Epoch 34/120
Epoch 00034: val_loss did not improve
 - 3s - loss: 0.2319 - acc: 0.8969 - val_loss: 0.4012 - val_acc: 0.8100
Epoch 35/120
Epoch 00035: val_loss did not improve
 - 2s - loss: 0.2119 - acc: 0.9047 - val_los

Epoch 93/120
Epoch 00093: val_loss did not improve
 - 2s - loss: 0.1252 - acc: 0.9492 - val_loss: 0.2328 - val_acc: 0.9003
Epoch 94/120
Epoch 00094: val_loss did not improve
 - 2s - loss: 0.1155 - acc: 0.9492 - val_loss: 0.2415 - val_acc: 0.9097
Epoch 95/120
Epoch 00095: val_loss did not improve
 - 2s - loss: 0.1083 - acc: 0.9625 - val_loss: 0.2224 - val_acc: 0.9097
Epoch 96/120
Epoch 00096: val_loss improved from 0.21979 to 0.21689, saving model to best_m.h5
 - 2s - loss: 0.1156 - acc: 0.9562 - val_loss: 0.2169 - val_acc: 0.9159
Epoch 97/120
Epoch 00097: val_loss did not improve
 - 2s - loss: 0.1134 - acc: 0.9531 - val_loss: 0.2177 - val_acc: 0.9159
Epoch 98/120
Epoch 00098: val_loss did not improve
 - 2s - loss: 0.0960 - acc: 0.9609 - val_loss: 0.2228 - val_acc: 0.9128
Epoch 99/120
Epoch 00099: val_loss improved from 0.21689 to 0.21026, saving model to best_m.h5
 - 2s - loss: 0.1259 - acc: 0.9531 - val_loss: 0.2103 - val_acc: 0.9283
Epoch 100/120
Epoch 00100: val_loss did not improve

Epoch 36/120
Epoch 00036: val_loss did not improve
 - 2s - loss: 0.1865 - acc: 0.9180 - val_loss: 0.2933 - val_acc: 0.8656
Epoch 37/120
Epoch 00037: val_loss did not improve
 - 2s - loss: 0.1912 - acc: 0.9164 - val_loss: 0.2832 - val_acc: 0.8906
Epoch 38/120
Epoch 00038: val_loss did not improve
 - 2s - loss: 0.1981 - acc: 0.9148 - val_loss: 0.3204 - val_acc: 0.8906
Epoch 39/120
Epoch 00039: val_loss did not improve
 - 2s - loss: 0.2136 - acc: 0.9141 - val_loss: 0.3120 - val_acc: 0.8906
Epoch 40/120
Epoch 00040: val_loss did not improve
 - 2s - loss: 0.1917 - acc: 0.9281 - val_loss: 0.6375 - val_acc: 0.7812
Epoch 41/120
Epoch 00041: val_loss did not improve
 - 2s - loss: 0.1935 - acc: 0.9211 - val_loss: 0.4220 - val_acc: 0.8156
Epoch 42/120
Epoch 00042: val_loss did not improve
 - 2s - loss: 0.2088 - acc: 0.9039 - val_loss: 0.3982 - val_acc: 0.8281
Epoch 43/120
Epoch 00043: val_loss did not improve
 - 2s - loss: 0.2338 - acc: 0.8961 - val_loss: 0.4886 - val_acc: 0.8125
Epoch 44/120
Epo

Epoch 103/120
Epoch 00103: val_loss did not improve
 - 2s - loss: 0.0957 - acc: 0.9641 - val_loss: 0.3414 - val_acc: 0.8875
Epoch 104/120
Epoch 00104: val_loss did not improve
 - 2s - loss: 0.0865 - acc: 0.9680 - val_loss: 0.3738 - val_acc: 0.8906
Epoch 105/120
Epoch 00105: val_loss did not improve
 - 2s - loss: 0.1048 - acc: 0.9570 - val_loss: 0.3448 - val_acc: 0.8906
Epoch 106/120
Epoch 00106: val_loss did not improve
 - 2s - loss: 0.1028 - acc: 0.9657 - val_loss: 0.3145 - val_acc: 0.8844
Epoch 107/120
Epoch 00107: val_loss did not improve
 - 2s - loss: 0.0933 - acc: 0.9688 - val_loss: 0.3092 - val_acc: 0.8844
Epoch 108/120
Epoch 00108: val_loss did not improve
 - 2s - loss: 0.1170 - acc: 0.9586 - val_loss: 0.3030 - val_acc: 0.8969
Epoch 109/120
Epoch 00109: val_loss did not improve
 - 2s - loss: 0.0945 - acc: 0.9609 - val_loss: 0.2973 - val_acc: 0.8969
Epoch 110/120
Epoch 00110: val_loss did not improve
 - 2s - loss: 0.1040 - acc: 0.9594 - val_loss: 0.3430 - val_acc: 0.8688
Epoch 11

In [34]:
with open('../features/cnn_4_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))


submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_4_aug_skimage_preprocess.csv', index=False)

0.214290988413
         id  is_iceberg
0  5941774d    0.005656
1  4023181e    0.515646
2  b20200e4    0.001513
3  e7f018bb    0.994804
4  4371c8c3    0.238940


In [35]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3), input_shape=(75, 75, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')


model model


In [36]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=5)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 1.02287, saving model to best_m.h5
 - 11s - loss: 0.7218 - acc: 0.5641 - val_loss: 1.0229 - val_acc: 0.4984
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 4s - loss: 0.5723 - acc: 0.7099 - val_loss: 1.4175 - val_acc: 0.4984
Epoch 3/120
Epoch 00003: val_loss improved from 1.02287 to 0.87617, saving model to best_m.h5
 - 4s - loss: 0.5363 - acc: 0.7277 - val_loss: 0.8762 - val_acc: 0.4984
Epoch 4/120
Epoch 00004: val_loss improved from 0.87617 to 0.50260, saving model to best_m.h5
 - 4s - loss: 0.4882 - acc: 0.7388 - val_loss: 0.5026 - val_acc: 0.7445
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 4s - loss: 0.4651 - acc: 0.7761 - val_loss: 0.6470 - val_acc: 0.6355
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 4s - loss: 0.4550 - acc: 0.7810 - val_loss: 0.6961 - val_acc: 0.5919
Epoch 7/120
Epoch 00007: val_loss improved from 0.50260 to 0.42279, saving model to best_m.h5
 - 4s - loss: 0.4171 - acc: 0.8091 - val_loss

Epoch 64/120
Epoch 00064: val_loss did not improve
 - 3s - loss: 0.2598 - acc: 0.8883 - val_loss: 0.4571 - val_acc: 0.8037
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 3s - loss: 0.2638 - acc: 0.8805 - val_loss: 0.2600 - val_acc: 0.9065
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 3s - loss: 0.2895 - acc: 0.8765 - val_loss: 0.3061 - val_acc: 0.8692
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 4s - loss: 0.2811 - acc: 0.8818 - val_loss: 0.3120 - val_acc: 0.8598
Epoch 68/120
Epoch 00068: val_loss improved from 0.25565 to 0.25213, saving model to best_m.h5
 - 4s - loss: 0.2940 - acc: 0.8777 - val_loss: 0.2521 - val_acc: 0.8847
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 4s - loss: 0.2698 - acc: 0.8849 - val_loss: 0.2653 - val_acc: 0.8972
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 4s - loss: 0.2743 - acc: 0.8664 - val_loss: 0.2896 - val_acc: 0.8910
Epoch 71/120
Epoch 00071: val_loss improved from 0.25213 to 0.25038, saving model to best_m.h5


Epoch 7/120
Epoch 00007: val_loss improved from 0.43617 to 0.39509, saving model to best_m.h5
 - 4s - loss: 0.4466 - acc: 0.8060 - val_loss: 0.3951 - val_acc: 0.8100
Epoch 8/120
Epoch 00008: val_loss did not improve
 - 4s - loss: 0.4416 - acc: 0.7810 - val_loss: 0.4635 - val_acc: 0.7508
Epoch 9/120
Epoch 00009: val_loss did not improve
 - 4s - loss: 0.3818 - acc: 0.8258 - val_loss: 0.4112 - val_acc: 0.7913
Epoch 10/120
Epoch 00010: val_loss did not improve
 - 4s - loss: 0.3897 - acc: 0.8060 - val_loss: 0.4104 - val_acc: 0.8006
Epoch 11/120
Epoch 00011: val_loss improved from 0.39509 to 0.35621, saving model to best_m.h5
 - 4s - loss: 0.3736 - acc: 0.8140 - val_loss: 0.3562 - val_acc: 0.8131
Epoch 12/120
Epoch 00012: val_loss improved from 0.35621 to 0.30322, saving model to best_m.h5
 - 4s - loss: 0.3748 - acc: 0.8162 - val_loss: 0.3032 - val_acc: 0.8536
Epoch 13/120
Epoch 00013: val_loss did not improve
 - 4s - loss: 0.3921 - acc: 0.8074 - val_loss: 0.3650 - val_acc: 0.8224
Epoch 14/1

Epoch 72/120
Epoch 00072: val_loss did not improve
 - 4s - loss: 0.2532 - acc: 0.8937 - val_loss: 0.2398 - val_acc: 0.8910
Epoch 73/120
Epoch 00073: val_loss improved from 0.22788 to 0.22688, saving model to best_m.h5
 - 4s - loss: 0.2240 - acc: 0.9078 - val_loss: 0.2269 - val_acc: 0.9034
Epoch 74/120
Epoch 00074: val_loss improved from 0.22688 to 0.22637, saving model to best_m.h5
 - 4s - loss: 0.2590 - acc: 0.8808 - val_loss: 0.2264 - val_acc: 0.8972
Epoch 75/120
Epoch 00075: val_loss improved from 0.22637 to 0.21840, saving model to best_m.h5
 - 4s - loss: 0.2515 - acc: 0.8945 - val_loss: 0.2184 - val_acc: 0.9065
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 4s - loss: 0.2497 - acc: 0.8930 - val_loss: 0.2198 - val_acc: 0.9065
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 4s - loss: 0.2421 - acc: 0.8984 - val_loss: 0.2219 - val_acc: 0.9065
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 4s - loss: 0.2199 - acc: 0.9055 - val_loss: 0.2206 - val_acc: 0.9097
Epoch 7

Epoch 15/120
Epoch 00015: val_loss did not improve
 - 4s - loss: 0.3810 - acc: 0.8054 - val_loss: 0.2917 - val_acc: 0.8536
Epoch 16/120
Epoch 00016: val_loss did not improve
 - 4s - loss: 0.4090 - acc: 0.8141 - val_loss: 0.2976 - val_acc: 0.8629
Epoch 17/120
Epoch 00017: val_loss did not improve
 - 3s - loss: 0.3990 - acc: 0.8055 - val_loss: 0.3667 - val_acc: 0.7757
Epoch 18/120
Epoch 00018: val_loss did not improve
 - 4s - loss: 0.3557 - acc: 0.8227 - val_loss: 0.3101 - val_acc: 0.8318
Epoch 19/120
Epoch 00019: val_loss improved from 0.28420 to 0.26883, saving model to best_m.h5
 - 4s - loss: 0.3591 - acc: 0.8273 - val_loss: 0.2688 - val_acc: 0.8567
Epoch 20/120
Epoch 00020: val_loss did not improve
 - 3s - loss: 0.3590 - acc: 0.8308 - val_loss: 0.2826 - val_acc: 0.8692
Epoch 21/120
Epoch 00021: val_loss did not improve
 - 4s - loss: 0.3412 - acc: 0.8388 - val_loss: 0.3676 - val_acc: 0.8287
Epoch 22/120
Epoch 00022: val_loss improved from 0.26883 to 0.26529, saving model to best_m.h5


Epoch 79/120
Epoch 00079: val_loss did not improve
 - 4s - loss: 0.2338 - acc: 0.9008 - val_loss: 0.2042 - val_acc: 0.8879
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 4s - loss: 0.2397 - acc: 0.9016 - val_loss: 0.2042 - val_acc: 0.8941
Epoch 81/120
Epoch 00081: val_loss improved from 0.20369 to 0.20367, saving model to best_m.h5
 - 4s - loss: 0.2588 - acc: 0.8849 - val_loss: 0.2037 - val_acc: 0.8910
Epoch 82/120
Epoch 00082: val_loss improved from 0.20367 to 0.20075, saving model to best_m.h5
 - 4s - loss: 0.2313 - acc: 0.9023 - val_loss: 0.2007 - val_acc: 0.8879
Epoch 83/120
Epoch 00083: val_loss did not improve
 - 4s - loss: 0.2230 - acc: 0.9086 - val_loss: 0.2043 - val_acc: 0.8972
Epoch 84/120
Epoch 00084: val_loss did not improve
 - 4s - loss: 0.2281 - acc: 0.8984 - val_loss: 0.2016 - val_acc: 0.8879
Epoch 85/120
Epoch 00085: val_loss did not improve
 - 4s - loss: 0.2410 - acc: 0.8969 - val_loss: 0.2219 - val_acc: 0.8847
Epoch 86/120
Epoch 00086: val_loss improved from 0.

Epoch 19/120
Epoch 00019: val_loss did not improve
 - 4s - loss: 0.3302 - acc: 0.8469 - val_loss: 0.7884 - val_acc: 0.7414
Epoch 20/120
Epoch 00020: val_loss did not improve
 - 4s - loss: 0.3678 - acc: 0.8388 - val_loss: 0.2962 - val_acc: 0.8567
Epoch 21/120
Epoch 00021: val_loss did not improve
 - 4s - loss: 0.3020 - acc: 0.8716 - val_loss: 0.3513 - val_acc: 0.8349
Epoch 22/120
Epoch 00022: val_loss did not improve
 - 4s - loss: 0.3529 - acc: 0.8368 - val_loss: 0.3635 - val_acc: 0.8442
Epoch 23/120
Epoch 00023: val_loss did not improve
 - 4s - loss: 0.2993 - acc: 0.8703 - val_loss: 0.2983 - val_acc: 0.8567
Epoch 24/120
Epoch 00024: val_loss did not improve
 - 4s - loss: 0.3193 - acc: 0.8482 - val_loss: 0.3464 - val_acc: 0.8411
Epoch 25/120
Epoch 00025: val_loss did not improve
 - 4s - loss: 0.3113 - acc: 0.8560 - val_loss: 0.3995 - val_acc: 0.8349
Epoch 26/120
Epoch 00026: val_loss did not improve
 - 4s - loss: 0.3209 - acc: 0.8589 - val_loss: 0.3233 - val_acc: 0.8598
Epoch 27/120
Epo

Epoch 85/120
Epoch 00085: val_loss did not improve
 - 4s - loss: 0.2100 - acc: 0.9133 - val_loss: 0.2857 - val_acc: 0.8879
Epoch 86/120
Epoch 00086: val_loss did not improve
 - 4s - loss: 0.2223 - acc: 0.9070 - val_loss: 0.2894 - val_acc: 0.8847
Epoch 87/120
Epoch 00087: val_loss did not improve
 - 4s - loss: 0.2421 - acc: 0.8959 - val_loss: 0.2800 - val_acc: 0.8785
Epoch 88/120
Epoch 00088: val_loss did not improve
 - 4s - loss: 0.2181 - acc: 0.9039 - val_loss: 0.2881 - val_acc: 0.8941
Epoch 89/120
Epoch 00089: val_loss did not improve
 - 4s - loss: 0.2330 - acc: 0.9055 - val_loss: 0.2934 - val_acc: 0.8847
Epoch 90/120
Epoch 00090: val_loss did not improve
 - 4s - loss: 0.2098 - acc: 0.9094 - val_loss: 0.2963 - val_acc: 0.8847
Epoch 91/120
Epoch 00091: val_loss did not improve
 - 4s - loss: 0.2436 - acc: 0.9070 - val_loss: 0.3048 - val_acc: 0.8785
Epoch 92/120
Epoch 00092: val_loss did not improve
 - 4s - loss: 0.2253 - acc: 0.9060 - val_loss: 0.3077 - val_acc: 0.8723
Epoch 93/120
Epo

Epoch 29/120
Epoch 00029: val_loss did not improve
 - 4s - loss: 0.3061 - acc: 0.8664 - val_loss: 0.3044 - val_acc: 0.8688
Epoch 30/120
Epoch 00030: val_loss did not improve
 - 4s - loss: 0.2832 - acc: 0.8844 - val_loss: 0.2980 - val_acc: 0.8844
Epoch 31/120
Epoch 00031: val_loss did not improve
 - 4s - loss: 0.2910 - acc: 0.8742 - val_loss: 0.2976 - val_acc: 0.8906
Epoch 32/120
Epoch 00032: val_loss did not improve
 - 4s - loss: 0.2501 - acc: 0.8797 - val_loss: 0.3088 - val_acc: 0.8750
Epoch 33/120
Epoch 00033: val_loss did not improve
 - 4s - loss: 0.2917 - acc: 0.8781 - val_loss: 0.3064 - val_acc: 0.8781
Epoch 34/120
Epoch 00034: val_loss did not improve
 - 4s - loss: 0.2647 - acc: 0.8836 - val_loss: 0.4362 - val_acc: 0.7875
Epoch 35/120
Epoch 00035: val_loss did not improve
 - 4s - loss: 0.2637 - acc: 0.8867 - val_loss: 0.4027 - val_acc: 0.8344
Epoch 36/120
Epoch 00036: val_loss did not improve
 - 4s - loss: 0.2643 - acc: 0.8937 - val_loss: 0.3380 - val_acc: 0.8781
Epoch 37/120
Epo

Epoch 95/120
Epoch 00095: val_loss did not improve
 - 4s - loss: 0.2208 - acc: 0.9156 - val_loss: 0.3090 - val_acc: 0.8688
Epoch 96/120
Epoch 00096: val_loss did not improve
 - 4s - loss: 0.2398 - acc: 0.8899 - val_loss: 0.2849 - val_acc: 0.8875
Epoch 97/120
Epoch 00097: val_loss did not improve
 - 4s - loss: 0.1897 - acc: 0.9180 - val_loss: 0.2926 - val_acc: 0.8844
Epoch 98/120
Epoch 00098: val_loss did not improve
 - 4s - loss: 0.1956 - acc: 0.9196 - val_loss: 0.2933 - val_acc: 0.8844
Epoch 99/120
Epoch 00099: val_loss did not improve
 - 4s - loss: 0.2106 - acc: 0.9156 - val_loss: 0.2855 - val_acc: 0.8844
Epoch 100/120
Epoch 00100: val_loss did not improve
 - 4s - loss: 0.2114 - acc: 0.9055 - val_loss: 0.3013 - val_acc: 0.8719
Epoch 101/120
Epoch 00101: val_loss did not improve
 - 4s - loss: 0.2002 - acc: 0.9188 - val_loss: 0.2954 - val_acc: 0.8812
Epoch 102/120
Epoch 00102: val_loss did not improve
 - 4s - loss: 0.2118 - acc: 0.9094 - val_loss: 0.3063 - val_acc: 0.8781
Epoch 103/120

In [37]:
with open('../features/cnn_5_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))
# 2364

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_5_aug_skimage_preprocess.csv', index=False)

0.230999538138
         id  is_iceberg
0  5941774d    0.183244
1  4023181e    0.469593
2  b20200e4    0.112887
3  e7f018bb    0.998942
4  4371c8c3    0.557149
