In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.utils.np_utils import to_categorical
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Flatten, Lambda, Activation
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint,LearningRateScheduler,EarlyStopping

train_df = pd.read_json('../input/train.json')
test_df = pd.read_json('../input/test.json')

Using TensorFlow backend.


In [2]:
from skimage.exposure import equalize_adapthist
from skimage.restoration import denoise_tv_chambolle

def std_img(x):
    for i in range(3):
        x[:, :, i] -= np.mean(x[:, :, i].flatten())
        x[:, :, i] /= np.std(x[:, :, i].flatten()) + 1e-7
    return x

def get_image(df):
    '''Create 3-channel 'images'. Return rescale-normalised images.'''
    images = []
    for i, row in df.iterrows():
        # Formulate the bands as 75x75 arrays
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 + band_2)/2
        
        # Rescale
        r = (band_1 - band_1.min()) / (band_1.max() - band_1.min())
        g = (band_2 - band_2.min()) / (band_2.max() - band_2.min())
        b = (band_3 - band_3.min()) / (band_3.max() - band_3.min())
        
        img = np.dstack([r,g,b])
        img = denoise_tv_chambolle(img,weight=0.2,multichannel=True)
        #img = std_img(img)
        images.append(img)
    return np.array(images)


train_x = get_image(train_df)
test_x = get_image(test_df)

print(train_x.shape,test_x.shape)

(1604, 75, 75, 3) (8424, 75, 75, 3)


In [3]:
y = train_df.is_iceberg.values
print(y[:5])

[0 0 1 0 0]


In [4]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(16, (3, 3), padding='same',input_shape=(75,75,3)))
    model.add(Activation('relu'))
    model.add(Conv2D(16, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    return model
print('model model')


model model


In [5]:
from sklearn.model_selection import KFold
from keras.preprocessing.image import ImageDataGenerator

def lr_f(epoch):
    if epoch<10:
        return 0.0008
    elif epoch<30:
        return 0.0004
    else:
        return 0.0001
    

def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=150, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/150
Epoch 00001: val_loss improved from inf to 0.67068, saving model to best_m.h5
 - 2s - loss: 0.6970 - acc: 0.5461 - val_loss: 0.6707 - val_acc: 0.7082
Epoch 2/150
Epoch 00002: val_loss improved from 0.67068 to 0.61228, saving model to best_m.h5
 - 1s - loss: 0.6303 - acc: 0.6558 - val_loss: 0.6123 - val_acc: 0.6883
Epoch 3/150
Epoch 00003: val_loss improved from 0.61228 to 0.59257, saving model to best_m.h5
 - 1s - loss: 0.6132 - acc: 0.6558 - val_loss: 0.5926 - val_acc: 0.6833
Epoch 4/150
Epoch 00004: val_loss did not improve
 - 1s - loss: 0.5987 - acc: 0.6528 - val_loss: 0.6035 - val_acc: 0.6833
Epoch 5/150
Epoch 00005: val_loss improved from 0.59257 to 0.56926, saving model to best_m.h5
 - 1s - loss: 0.5787 - acc: 0.6756 - val_loss: 0.5693 - val_acc: 0.6783
Epoch 6/150
Epoch 00006: val_loss improved from 0.56926 to 0.54395, saving model to best_m.h5
 - 1s - loss: 0.5676 - acc: 0.6737 - val_loss: 0.5440 - val_acc: 0.7007
Epoch 7/150
Epoch 00007: val_loss did not improve
 -

Epoch 61/150
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.3555 - acc: 0.8216 - val_loss: 0.3143 - val_acc: 0.8579
Epoch 62/150
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3573 - acc: 0.8241 - val_loss: 0.3284 - val_acc: 0.8603
Epoch 63/150
Epoch 00063: val_loss improved from 0.30236 to 0.29815, saving model to best_m.h5
 - 1s - loss: 0.3651 - acc: 0.8133 - val_loss: 0.2981 - val_acc: 0.8703
Epoch 64/150
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.3608 - acc: 0.8203 - val_loss: 0.3166 - val_acc: 0.8628
Epoch 65/150
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3572 - acc: 0.8150 - val_loss: 0.3147 - val_acc: 0.8579
Epoch 66/150
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.3697 - acc: 0.8106 - val_loss: 0.3091 - val_acc: 0.8628
Epoch 67/150
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.3498 - acc: 0.8300 - val_loss: 0.3261 - val_acc: 0.8504
Epoch 68/150
Epoch 00068: val_loss improved from 0.29815 to 0.29605, saving model to best_m.h5


Epoch 125/150
Epoch 00125: val_loss did not improve
 - 1s - loss: 0.3348 - acc: 0.8383 - val_loss: 0.3135 - val_acc: 0.8653
Epoch 126/150
Epoch 00126: val_loss did not improve
 - 1s - loss: 0.3037 - acc: 0.8458 - val_loss: 0.2980 - val_acc: 0.8753
Epoch 127/150
Epoch 00127: val_loss did not improve
 - 1s - loss: 0.3313 - acc: 0.8347 - val_loss: 0.2946 - val_acc: 0.8628
Epoch 128/150
Epoch 00128: val_loss did not improve
 - 1s - loss: 0.3383 - acc: 0.8317 - val_loss: 0.3184 - val_acc: 0.8653
Epoch 129/150
Epoch 00129: val_loss did not improve
 - 1s - loss: 0.3237 - acc: 0.8416 - val_loss: 0.2919 - val_acc: 0.8703
Epoch 130/150
Epoch 00130: val_loss did not improve
 - 1s - loss: 0.3444 - acc: 0.8339 - val_loss: 0.3151 - val_acc: 0.8554
Epoch 131/150
Epoch 00131: val_loss did not improve
 - 1s - loss: 0.3194 - acc: 0.8391 - val_loss: 0.2827 - val_acc: 0.8653
Epoch 132/150
Epoch 00132: val_loss did not improve
 - 1s - loss: 0.3203 - acc: 0.8331 - val_loss: 0.3089 - val_acc: 0.8678
Epoch 13

Epoch 35/150
Epoch 00035: val_loss improved from 0.33632 to 0.33193, saving model to best_m.h5
 - 1s - loss: 0.3300 - acc: 0.8500 - val_loss: 0.3319 - val_acc: 0.8254
Epoch 36/150
Epoch 00036: val_loss improved from 0.33193 to 0.33093, saving model to best_m.h5
 - 1s - loss: 0.3291 - acc: 0.8464 - val_loss: 0.3309 - val_acc: 0.8304
Epoch 37/150
Epoch 00037: val_loss did not improve
 - 1s - loss: 0.3169 - acc: 0.8358 - val_loss: 0.3467 - val_acc: 0.8180
Epoch 38/150
Epoch 00038: val_loss did not improve
 - 1s - loss: 0.3232 - acc: 0.8450 - val_loss: 0.3421 - val_acc: 0.8180
Epoch 39/150
Epoch 00039: val_loss improved from 0.33093 to 0.32457, saving model to best_m.h5
 - 1s - loss: 0.3410 - acc: 0.8381 - val_loss: 0.3246 - val_acc: 0.8429
Epoch 40/150
Epoch 00040: val_loss did not improve
 - 1s - loss: 0.3278 - acc: 0.8442 - val_loss: 0.3406 - val_acc: 0.8229
Epoch 41/150
Epoch 00041: val_loss did not improve
 - 1s - loss: 0.3238 - acc: 0.8439 - val_loss: 0.3274 - val_acc: 0.8429
Epoch 4

Epoch 97/150
Epoch 00097: val_loss did not improve
 - 1s - loss: 0.2951 - acc: 0.8741 - val_loss: 0.3134 - val_acc: 0.8304
Epoch 98/150
Epoch 00098: val_loss did not improve
 - 2s - loss: 0.3061 - acc: 0.8542 - val_loss: 0.3179 - val_acc: 0.8429
Epoch 99/150
Epoch 00099: val_loss did not improve
 - 1s - loss: 0.3075 - acc: 0.8551 - val_loss: 0.3031 - val_acc: 0.8429
Epoch 100/150
Epoch 00100: val_loss did not improve
 - 1s - loss: 0.2935 - acc: 0.8583 - val_loss: 0.3074 - val_acc: 0.8479
Epoch 101/150
Epoch 00101: val_loss improved from 0.30058 to 0.29895, saving model to best_m.h5
 - 2s - loss: 0.3198 - acc: 0.8583 - val_loss: 0.2990 - val_acc: 0.8454
Epoch 102/150
Epoch 00102: val_loss did not improve
 - 2s - loss: 0.2912 - acc: 0.8675 - val_loss: 0.2996 - val_acc: 0.8454
Epoch 103/150
Epoch 00103: val_loss did not improve
 - 1s - loss: 0.2916 - acc: 0.8591 - val_loss: 0.3064 - val_acc: 0.8404
Epoch 104/150
Epoch 00104: val_loss did not improve
 - 1s - loss: 0.3068 - acc: 0.8464 - va

Epoch 10/150
Epoch 00010: val_loss improved from 0.51874 to 0.49306, saving model to best_m.h5
 - 1s - loss: 0.5362 - acc: 0.7041 - val_loss: 0.4931 - val_acc: 0.7232
Epoch 11/150
Epoch 00011: val_loss improved from 0.49306 to 0.47987, saving model to best_m.h5
 - 2s - loss: 0.5152 - acc: 0.7116 - val_loss: 0.4799 - val_acc: 0.7182
Epoch 12/150
Epoch 00012: val_loss improved from 0.47987 to 0.47704, saving model to best_m.h5
 - 1s - loss: 0.5262 - acc: 0.7008 - val_loss: 0.4770 - val_acc: 0.7282
Epoch 13/150
Epoch 00013: val_loss improved from 0.47704 to 0.46425, saving model to best_m.h5
 - 1s - loss: 0.4908 - acc: 0.7406 - val_loss: 0.4642 - val_acc: 0.7656
Epoch 14/150
Epoch 00014: val_loss improved from 0.46425 to 0.46073, saving model to best_m.h5
 - 1s - loss: 0.5050 - acc: 0.7291 - val_loss: 0.4607 - val_acc: 0.7606
Epoch 15/150
Epoch 00015: val_loss improved from 0.46073 to 0.45917, saving model to best_m.h5
 - 1s - loss: 0.4917 - acc: 0.7381 - val_loss: 0.4592 - val_acc: 0.740

Epoch 00068: val_loss improved from 0.32972 to 0.32696, saving model to best_m.h5
 - 1s - loss: 0.3649 - acc: 0.8158 - val_loss: 0.3270 - val_acc: 0.8504
Epoch 69/150
Epoch 00069: val_loss did not improve
 - 1s - loss: 0.3584 - acc: 0.8314 - val_loss: 0.3295 - val_acc: 0.8304
Epoch 70/150
Epoch 00070: val_loss improved from 0.32696 to 0.32263, saving model to best_m.h5
 - 1s - loss: 0.3597 - acc: 0.8264 - val_loss: 0.3226 - val_acc: 0.8379
Epoch 71/150
Epoch 00071: val_loss did not improve
 - 1s - loss: 0.3742 - acc: 0.8156 - val_loss: 0.3326 - val_acc: 0.8454
Epoch 72/150
Epoch 00072: val_loss did not improve
 - 1s - loss: 0.3764 - acc: 0.8091 - val_loss: 0.3309 - val_acc: 0.8329
Epoch 73/150
Epoch 00073: val_loss did not improve
 - 1s - loss: 0.3602 - acc: 0.8200 - val_loss: 0.3467 - val_acc: 0.8404
Epoch 74/150
Epoch 00074: val_loss did not improve
 - 1s - loss: 0.3630 - acc: 0.8250 - val_loss: 0.3284 - val_acc: 0.8479
Epoch 75/150
Epoch 00075: val_loss did not improve
 - 1s - loss:

Epoch 131/150
Epoch 00131: val_loss did not improve
 - 1s - loss: 0.3361 - acc: 0.8366 - val_loss: 0.3222 - val_acc: 0.8653
Epoch 132/150
Epoch 00132: val_loss did not improve
 - 1s - loss: 0.3360 - acc: 0.8392 - val_loss: 0.3102 - val_acc: 0.8579
Epoch 133/150
Epoch 00133: val_loss improved from 0.30730 to 0.30710, saving model to best_m.h5
 - 1s - loss: 0.3259 - acc: 0.8508 - val_loss: 0.3071 - val_acc: 0.8479
Epoch 134/150
Epoch 00134: val_loss did not improve
 - 1s - loss: 0.3415 - acc: 0.8239 - val_loss: 0.3077 - val_acc: 0.8504
Epoch 135/150
Epoch 00135: val_loss did not improve
 - 1s - loss: 0.3268 - acc: 0.8508 - val_loss: 0.3085 - val_acc: 0.8479
Epoch 136/150
Epoch 00136: val_loss did not improve
 - 1s - loss: 0.3246 - acc: 0.8508 - val_loss: 0.3165 - val_acc: 0.8554
Epoch 137/150
Epoch 00137: val_loss did not improve
 - 1s - loss: 0.3684 - acc: 0.8206 - val_loss: 0.3155 - val_acc: 0.8603
Epoch 138/150
Epoch 00138: val_loss did not improve
 - 1s - loss: 0.3182 - acc: 0.8375 -

Epoch 39/150
Epoch 00039: val_loss did not improve
 - 1s - loss: 0.3593 - acc: 0.8264 - val_loss: 0.3869 - val_acc: 0.8080
Epoch 40/150
Epoch 00040: val_loss did not improve
 - 1s - loss: 0.3638 - acc: 0.8306 - val_loss: 0.3670 - val_acc: 0.8080
Epoch 41/150
Epoch 00041: val_loss improved from 0.36576 to 0.36359, saving model to best_m.h5
 - 1s - loss: 0.3761 - acc: 0.8200 - val_loss: 0.3636 - val_acc: 0.8155
Epoch 42/150
Epoch 00042: val_loss did not improve
 - 1s - loss: 0.3781 - acc: 0.8224 - val_loss: 0.3645 - val_acc: 0.8180
Epoch 43/150
Epoch 00043: val_loss did not improve
 - 1s - loss: 0.3717 - acc: 0.8267 - val_loss: 0.3778 - val_acc: 0.7930
Epoch 44/150
Epoch 00044: val_loss did not improve
 - 1s - loss: 0.3718 - acc: 0.8178 - val_loss: 0.3653 - val_acc: 0.8105
Epoch 45/150
Epoch 00045: val_loss did not improve
 - 1s - loss: 0.3793 - acc: 0.8062 - val_loss: 0.3785 - val_acc: 0.8005
Epoch 46/150
Epoch 00046: val_loss improved from 0.36359 to 0.36213, saving model to best_m.h5


Epoch 99/150
Epoch 00099: val_loss did not improve
 - 1s - loss: 0.3103 - acc: 0.8516 - val_loss: 0.3451 - val_acc: 0.8379
Epoch 100/150
Epoch 00100: val_loss improved from 0.34316 to 0.34194, saving model to best_m.h5
 - 1s - loss: 0.3358 - acc: 0.8406 - val_loss: 0.3419 - val_acc: 0.8454
Epoch 101/150
Epoch 00101: val_loss did not improve
 - 1s - loss: 0.3421 - acc: 0.8325 - val_loss: 0.3495 - val_acc: 0.8354
Epoch 102/150
Epoch 00102: val_loss did not improve
 - 1s - loss: 0.3035 - acc: 0.8600 - val_loss: 0.3500 - val_acc: 0.8279
Epoch 103/150
Epoch 00103: val_loss did not improve
 - 1s - loss: 0.3581 - acc: 0.8370 - val_loss: 0.3462 - val_acc: 0.8304
Epoch 104/150
Epoch 00104: val_loss did not improve
 - 1s - loss: 0.3530 - acc: 0.8303 - val_loss: 0.3429 - val_acc: 0.8354
Epoch 105/150
Epoch 00105: val_loss did not improve
 - 1s - loss: 0.3104 - acc: 0.8616 - val_loss: 0.3514 - val_acc: 0.8329
Epoch 106/150
Epoch 00106: val_loss did not improve
 - 2s - loss: 0.3414 - acc: 0.8275 - 

In [6]:
import pickle
with open('../features/cnn_1_aug_skimage_denoise_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
from sklearn.metrics import log_loss
print(log_loss(y,train_pred))

#pre 3219
# new 2965

# skimage 2725

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_1_aug_denoise_preprocess.csv', index=False)

0.301663637817
         id  is_iceberg
0  5941774d    0.489373
1  4023181e    0.358485
2  b20200e4    0.000005
3  e7f018bb    0.974313
4  4371c8c3    0.366174


In [7]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(32, (3, 3),input_shape=(75, 75, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1, activation='sigmoid'))
    return model
print('model model')

model model


In [8]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=150, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/150
Epoch 00001: val_loss improved from inf to 0.69085, saving model to best_m.h5
 - 2s - loss: 0.6936 - acc: 0.5278 - val_loss: 0.6908 - val_acc: 0.5087
Epoch 2/150
Epoch 00002: val_loss improved from 0.69085 to 0.68873, saving model to best_m.h5
 - 1s - loss: 0.6546 - acc: 0.5747 - val_loss: 0.6887 - val_acc: 0.5012
Epoch 3/150
Epoch 00003: val_loss did not improve
 - 1s - loss: 0.6904 - acc: 0.5401 - val_loss: 0.6963 - val_acc: 0.5087
Epoch 4/150
Epoch 00004: val_loss improved from 0.68873 to 0.59644, saving model to best_m.h5
 - 1s - loss: 0.6671 - acc: 0.5795 - val_loss: 0.5964 - val_acc: 0.6733
Epoch 5/150
Epoch 00005: val_loss did not improve
 - 2s - loss: 0.6467 - acc: 0.5933 - val_loss: 0.6023 - val_acc: 0.6758
Epoch 6/150
Epoch 00006: val_loss improved from 0.59644 to 0.54163, saving model to best_m.h5
 - 2s - loss: 0.5884 - acc: 0.6478 - val_loss: 0.5416 - val_acc: 0.7007
Epoch 7/150
Epoch 00007: val_loss improved from 0.54163 to 0.51105, saving model to best_m.h5
 -

Epoch 60/150
Epoch 00060: val_loss did not improve
 - 1s - loss: 0.3137 - acc: 0.8600 - val_loss: 0.2867 - val_acc: 0.8603
Epoch 61/150
Epoch 00061: val_loss improved from 0.26836 to 0.26764, saving model to best_m.h5
 - 1s - loss: 0.3232 - acc: 0.8458 - val_loss: 0.2676 - val_acc: 0.8653
Epoch 62/150
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3310 - acc: 0.8533 - val_loss: 0.2690 - val_acc: 0.8728
Epoch 63/150
Epoch 00063: val_loss improved from 0.26764 to 0.26661, saving model to best_m.h5
 - 1s - loss: 0.3322 - acc: 0.8339 - val_loss: 0.2666 - val_acc: 0.8828
Epoch 64/150
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.3172 - acc: 0.8633 - val_loss: 0.3101 - val_acc: 0.8504
Epoch 65/150
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3395 - acc: 0.8414 - val_loss: 0.2746 - val_acc: 0.8603
Epoch 66/150
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.3199 - acc: 0.8550 - val_loss: 0.2723 - val_acc: 0.8653
Epoch 67/150
Epoch 00067: val_loss did not improve


Epoch 124/150
Epoch 00124: val_loss did not improve
 - 1s - loss: 0.3106 - acc: 0.8608 - val_loss: 0.2674 - val_acc: 0.8878
Epoch 125/150
Epoch 00125: val_loss did not improve
 - 1s - loss: 0.2942 - acc: 0.8572 - val_loss: 0.2847 - val_acc: 0.8778
Epoch 126/150
Epoch 00126: val_loss did not improve
 - 1s - loss: 0.3111 - acc: 0.8575 - val_loss: 0.2514 - val_acc: 0.8878
Epoch 127/150
Epoch 00127: val_loss did not improve
 - 1s - loss: 0.2880 - acc: 0.8716 - val_loss: 0.2474 - val_acc: 0.8903
Epoch 128/150
Epoch 00128: val_loss did not improve
 - 1s - loss: 0.3126 - acc: 0.8508 - val_loss: 0.2646 - val_acc: 0.8828
Epoch 129/150
Epoch 00129: val_loss did not improve
 - 1s - loss: 0.2848 - acc: 0.8754 - val_loss: 0.2714 - val_acc: 0.8828
Epoch 130/150
Epoch 00130: val_loss did not improve
 - 1s - loss: 0.3275 - acc: 0.8381 - val_loss: 0.2756 - val_acc: 0.8828
Epoch 131/150
Epoch 00131: val_loss did not improve
 - 1s - loss: 0.3067 - acc: 0.8616 - val_loss: 0.2519 - val_acc: 0.8953
Epoch 13

Epoch 36/150
Epoch 00036: val_loss did not improve
 - 1s - loss: 0.2820 - acc: 0.8733 - val_loss: 0.3616 - val_acc: 0.8454
Epoch 37/150
Epoch 00037: val_loss did not improve
 - 1s - loss: 0.3080 - acc: 0.8616 - val_loss: 0.3517 - val_acc: 0.8304
Epoch 38/150
Epoch 00038: val_loss did not improve
 - 1s - loss: 0.2772 - acc: 0.8717 - val_loss: 0.3430 - val_acc: 0.8454
Epoch 39/150
Epoch 00039: val_loss did not improve
 - 1s - loss: 0.3143 - acc: 0.8650 - val_loss: 0.3529 - val_acc: 0.8279
Epoch 40/150
Epoch 00040: val_loss did not improve
 - 1s - loss: 0.2765 - acc: 0.8716 - val_loss: 0.3535 - val_acc: 0.8354
Epoch 41/150
Epoch 00041: val_loss did not improve
 - 1s - loss: 0.3012 - acc: 0.8625 - val_loss: 0.3389 - val_acc: 0.8304
Epoch 42/150
Epoch 00042: val_loss improved from 0.33322 to 0.32866, saving model to best_m.h5
 - 1s - loss: 0.3099 - acc: 0.8591 - val_loss: 0.3287 - val_acc: 0.8329
Epoch 43/150
Epoch 00043: val_loss did not improve
 - 1s - loss: 0.2846 - acc: 0.8789 - val_los

Epoch 102/150
Epoch 00102: val_loss did not improve
 - 1s - loss: 0.2752 - acc: 0.8867 - val_loss: 0.3488 - val_acc: 0.8379
Epoch 103/150
Epoch 00103: val_loss did not improve
 - 1s - loss: 0.2618 - acc: 0.8925 - val_loss: 0.3338 - val_acc: 0.8379
Epoch 104/150
Epoch 00104: val_loss did not improve
 - 1s - loss: 0.2748 - acc: 0.8700 - val_loss: 0.3424 - val_acc: 0.8279
Epoch 105/150
Epoch 00105: val_loss did not improve
 - 1s - loss: 0.2710 - acc: 0.8825 - val_loss: 0.3316 - val_acc: 0.8354
Epoch 106/150
Epoch 00106: val_loss improved from 0.31460 to 0.31292, saving model to best_m.h5
 - 1s - loss: 0.3035 - acc: 0.8604 - val_loss: 0.3129 - val_acc: 0.8354
Epoch 107/150
Epoch 00107: val_loss did not improve
 - 1s - loss: 0.2525 - acc: 0.8825 - val_loss: 0.3307 - val_acc: 0.8304
Epoch 108/150
Epoch 00108: val_loss did not improve
 - 1s - loss: 0.2788 - acc: 0.8783 - val_loss: 0.3220 - val_acc: 0.8254
Epoch 109/150
Epoch 00109: val_loss did not improve
 - 1s - loss: 0.2792 - acc: 0.8781 -

Epoch 14/150
Epoch 00014: val_loss improved from 0.49143 to 0.48694, saving model to best_m.h5
 - 1s - loss: 0.5023 - acc: 0.7206 - val_loss: 0.4869 - val_acc: 0.7132
Epoch 15/150
Epoch 00015: val_loss improved from 0.48694 to 0.47662, saving model to best_m.h5
 - 1s - loss: 0.4973 - acc: 0.7241 - val_loss: 0.4766 - val_acc: 0.7431
Epoch 16/150
Epoch 00016: val_loss did not improve
 - 1s - loss: 0.4871 - acc: 0.7350 - val_loss: 0.4797 - val_acc: 0.7132
Epoch 17/150
Epoch 00017: val_loss did not improve
 - 1s - loss: 0.5036 - acc: 0.7205 - val_loss: 0.5077 - val_acc: 0.7032
Epoch 18/150
Epoch 00018: val_loss did not improve
 - 1s - loss: 0.4854 - acc: 0.7400 - val_loss: 0.5862 - val_acc: 0.6534
Epoch 19/150
Epoch 00019: val_loss improved from 0.47662 to 0.47083, saving model to best_m.h5
 - 1s - loss: 0.4767 - acc: 0.7339 - val_loss: 0.4708 - val_acc: 0.7955
Epoch 20/150
Epoch 00020: val_loss did not improve
 - 1s - loss: 0.4732 - acc: 0.7550 - val_loss: 0.5130 - val_acc: 0.7032
Epoch 2

Epoch 75/150
Epoch 00075: val_loss did not improve
 - 1s - loss: 0.3686 - acc: 0.8247 - val_loss: 0.3365 - val_acc: 0.8603
Epoch 76/150
Epoch 00076: val_loss did not improve
 - 1s - loss: 0.3581 - acc: 0.8166 - val_loss: 0.3420 - val_acc: 0.8529
Epoch 77/150
Epoch 00077: val_loss did not improve
 - 1s - loss: 0.3786 - acc: 0.8056 - val_loss: 0.3389 - val_acc: 0.8554
Epoch 78/150
Epoch 00078: val_loss did not improve
 - 1s - loss: 0.3393 - acc: 0.8483 - val_loss: 0.3374 - val_acc: 0.8554
Epoch 79/150
Epoch 00079: val_loss did not improve
 - 1s - loss: 0.3548 - acc: 0.8400 - val_loss: 0.3368 - val_acc: 0.8728
Epoch 80/150
Epoch 00080: val_loss improved from 0.33483 to 0.33459, saving model to best_m.h5
 - 1s - loss: 0.3651 - acc: 0.8139 - val_loss: 0.3346 - val_acc: 0.8579
Epoch 81/150
Epoch 00081: val_loss did not improve
 - 1s - loss: 0.3443 - acc: 0.8331 - val_loss: 0.3355 - val_acc: 0.8529
Epoch 82/150
Epoch 00082: val_loss improved from 0.33459 to 0.32963, saving model to best_m.h5


Epoch 138/150
Epoch 00138: val_loss did not improve
 - 1s - loss: 0.3070 - acc: 0.8575 - val_loss: 0.3033 - val_acc: 0.8628
Epoch 139/150
Epoch 00139: val_loss did not improve
 - 1s - loss: 0.3323 - acc: 0.8350 - val_loss: 0.3138 - val_acc: 0.8703
Epoch 140/150
Epoch 00140: val_loss did not improve
 - 1s - loss: 0.3006 - acc: 0.8575 - val_loss: 0.3303 - val_acc: 0.8678
Epoch 141/150
Epoch 00141: val_loss did not improve
 - 1s - loss: 0.3351 - acc: 0.8416 - val_loss: 0.3122 - val_acc: 0.8504
Epoch 142/150
Epoch 00142: val_loss did not improve
 - 1s - loss: 0.3148 - acc: 0.8508 - val_loss: 0.3049 - val_acc: 0.8678
Epoch 143/150
Epoch 00143: val_loss did not improve
 - 1s - loss: 0.3212 - acc: 0.8433 - val_loss: 0.3019 - val_acc: 0.8728
Epoch 144/150
Epoch 00144: val_loss did not improve
 - 1s - loss: 0.3050 - acc: 0.8625 - val_loss: 0.3272 - val_acc: 0.8628
Epoch 145/150
Epoch 00145: val_loss did not improve
 - 1s - loss: 0.3173 - acc: 0.8383 - val_loss: 0.3171 - val_acc: 0.8579
Epoch 14

Epoch 46/150
Epoch 00046: val_loss did not improve
 - 1s - loss: 0.2881 - acc: 0.8748 - val_loss: 0.3464 - val_acc: 0.8603
Epoch 47/150
Epoch 00047: val_loss did not improve
 - 1s - loss: 0.3058 - acc: 0.8683 - val_loss: 0.3461 - val_acc: 0.8479
Epoch 48/150
Epoch 00048: val_loss did not improve
 - 1s - loss: 0.2810 - acc: 0.8683 - val_loss: 0.3615 - val_acc: 0.8254
Epoch 49/150
Epoch 00049: val_loss did not improve
 - 1s - loss: 0.2796 - acc: 0.8775 - val_loss: 0.3595 - val_acc: 0.8329
Epoch 50/150
Epoch 00050: val_loss did not improve
 - 1s - loss: 0.2746 - acc: 0.8700 - val_loss: 0.3489 - val_acc: 0.8404
Epoch 51/150
Epoch 00051: val_loss improved from 0.34580 to 0.34518, saving model to best_m.h5
 - 1s - loss: 0.2785 - acc: 0.8575 - val_loss: 0.3452 - val_acc: 0.8429
Epoch 52/150
Epoch 00052: val_loss improved from 0.34518 to 0.34437, saving model to best_m.h5
 - 1s - loss: 0.2737 - acc: 0.8833 - val_loss: 0.3444 - val_acc: 0.8504
Epoch 53/150
Epoch 00053: val_loss did not improve


Epoch 111/150
Epoch 00111: val_loss did not improve
 - 1s - loss: 0.2660 - acc: 0.8808 - val_loss: 0.3594 - val_acc: 0.8379
Epoch 112/150
Epoch 00112: val_loss did not improve
 - 1s - loss: 0.2546 - acc: 0.8816 - val_loss: 0.3384 - val_acc: 0.8529
Epoch 113/150
Epoch 00113: val_loss improved from 0.33824 to 0.33725, saving model to best_m.h5
 - 1s - loss: 0.2471 - acc: 0.8841 - val_loss: 0.3372 - val_acc: 0.8529
Epoch 114/150
Epoch 00114: val_loss did not improve
 - 1s - loss: 0.2379 - acc: 0.8950 - val_loss: 0.3495 - val_acc: 0.8479
Epoch 115/150
Epoch 00115: val_loss did not improve
 - 1s - loss: 0.2457 - acc: 0.8967 - val_loss: 0.3399 - val_acc: 0.8529
Epoch 116/150
Epoch 00116: val_loss did not improve
 - 1s - loss: 0.2526 - acc: 0.8883 - val_loss: 0.3418 - val_acc: 0.8454
Epoch 117/150
Epoch 00117: val_loss did not improve
 - 1s - loss: 0.2560 - acc: 0.8800 - val_loss: 0.3707 - val_acc: 0.8254
Epoch 118/150
Epoch 00118: val_loss did not improve
 - 1s - loss: 0.2519 - acc: 0.8808 -

In [9]:
with open('../features/cnn_2_aug_denoise_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2728
# skimage 2710

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_2_aug_denoise_preprocess.csv', index=False)

0.298207797385
         id  is_iceberg
0  5941774d    0.620852
1  4023181e    0.405667
2  b20200e4    0.110021
3  e7f018bb    0.974256
4  4371c8c3    0.332398


In [10]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu' ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')

model model


In [11]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=150, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/150
Epoch 00001: val_loss improved from inf to 0.67610, saving model to best_m.h5
 - 3s - loss: 0.6945 - acc: 0.5239 - val_loss: 0.6761 - val_acc: 0.5761
Epoch 2/150
Epoch 00002: val_loss did not improve
 - 2s - loss: 0.6895 - acc: 0.5503 - val_loss: 0.6921 - val_acc: 0.5087
Epoch 3/150
Epoch 00003: val_loss improved from 0.67610 to 0.60292, saving model to best_m.h5
 - 2s - loss: 0.6480 - acc: 0.5780 - val_loss: 0.6029 - val_acc: 0.4813
Epoch 4/150
Epoch 00004: val_loss improved from 0.60292 to 0.58214, saving model to best_m.h5
 - 2s - loss: 0.6411 - acc: 0.6153 - val_loss: 0.5821 - val_acc: 0.7057
Epoch 5/150
Epoch 00005: val_loss did not improve
 - 2s - loss: 0.6062 - acc: 0.6722 - val_loss: 0.5940 - val_acc: 0.6608
Epoch 6/150
Epoch 00006: val_loss improved from 0.58214 to 0.55652, saving model to best_m.h5
 - 2s - loss: 0.5908 - acc: 0.6662 - val_loss: 0.5565 - val_acc: 0.7357
Epoch 7/150
Epoch 00007: val_loss did not improve
 - 2s - loss: 0.5760 - acc: 0.6800 - val_loss:

Epoch 59/150
Epoch 00059: val_loss did not improve
 - 2s - loss: 0.3535 - acc: 0.8325 - val_loss: 0.2743 - val_acc: 0.8803
Epoch 60/150
Epoch 00060: val_loss did not improve
 - 2s - loss: 0.3630 - acc: 0.8197 - val_loss: 0.2840 - val_acc: 0.8628
Epoch 61/150
Epoch 00061: val_loss did not improve
 - 2s - loss: 0.3372 - acc: 0.8372 - val_loss: 0.2785 - val_acc: 0.8628
Epoch 62/150
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.3426 - acc: 0.8266 - val_loss: 0.2927 - val_acc: 0.8554
Epoch 63/150
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.3545 - acc: 0.8272 - val_loss: 0.2842 - val_acc: 0.8753
Epoch 64/150
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.3435 - acc: 0.8370 - val_loss: 0.3114 - val_acc: 0.8404
Epoch 65/150
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.3476 - acc: 0.8291 - val_loss: 0.2907 - val_acc: 0.8678
Epoch 66/150
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.3497 - acc: 0.8347 - val_loss: 0.2848 - val_acc: 0.8653
Epoch 67/150
Epo

Epoch 124/150
Epoch 00124: val_loss did not improve
 - 2s - loss: 0.3258 - acc: 0.8403 - val_loss: 0.2676 - val_acc: 0.8678
Epoch 125/150
Epoch 00125: val_loss did not improve
 - 2s - loss: 0.3051 - acc: 0.8583 - val_loss: 0.2675 - val_acc: 0.8703
Epoch 126/150
Epoch 00126: val_loss did not improve
 - 2s - loss: 0.3188 - acc: 0.8483 - val_loss: 0.2606 - val_acc: 0.8753
Epoch 127/150
Epoch 00127: val_loss did not improve
 - 2s - loss: 0.2939 - acc: 0.8650 - val_loss: 0.2597 - val_acc: 0.8603
Epoch 128/150
Epoch 00128: val_loss did not improve
 - 2s - loss: 0.3184 - acc: 0.8547 - val_loss: 0.2599 - val_acc: 0.8728
Epoch 129/150
Epoch 00129: val_loss did not improve
 - 2s - loss: 0.3184 - acc: 0.8625 - val_loss: 0.2751 - val_acc: 0.8678
Epoch 130/150
Epoch 00130: val_loss did not improve
 - 2s - loss: 0.3186 - acc: 0.8525 - val_loss: 0.2816 - val_acc: 0.8703
Epoch 131/150
Epoch 00131: val_loss did not improve
 - 2s - loss: 0.3047 - acc: 0.8414 - val_loss: 0.2619 - val_acc: 0.8703
Epoch 13

Epoch 35/150
Epoch 00035: val_loss did not improve
 - 2s - loss: 0.3708 - acc: 0.8225 - val_loss: 0.3734 - val_acc: 0.8180
Epoch 36/150
Epoch 00036: val_loss did not improve
 - 2s - loss: 0.3232 - acc: 0.8422 - val_loss: 0.3809 - val_acc: 0.8180
Epoch 37/150
Epoch 00037: val_loss did not improve
 - 2s - loss: 0.3661 - acc: 0.8300 - val_loss: 0.3879 - val_acc: 0.8180
Epoch 38/150
Epoch 00038: val_loss improved from 0.37067 to 0.35551, saving model to best_m.h5
 - 2s - loss: 0.3509 - acc: 0.8208 - val_loss: 0.3555 - val_acc: 0.8180
Epoch 39/150
Epoch 00039: val_loss did not improve
 - 2s - loss: 0.3399 - acc: 0.8425 - val_loss: 0.3573 - val_acc: 0.8254
Epoch 40/150
Epoch 00040: val_loss improved from 0.35551 to 0.35373, saving model to best_m.h5
 - 2s - loss: 0.3361 - acc: 0.8341 - val_loss: 0.3537 - val_acc: 0.8329
Epoch 41/150
Epoch 00041: val_loss did not improve
 - 2s - loss: 0.3433 - acc: 0.8391 - val_loss: 0.3824 - val_acc: 0.8204
Epoch 42/150
Epoch 00042: val_loss did not improve


Epoch 99/150
Epoch 00099: val_loss did not improve
 - 2s - loss: 0.2958 - acc: 0.8648 - val_loss: 0.3539 - val_acc: 0.8279
Epoch 100/150
Epoch 00100: val_loss did not improve
 - 2s - loss: 0.3232 - acc: 0.8639 - val_loss: 0.3612 - val_acc: 0.8130
Epoch 101/150
Epoch 00101: val_loss did not improve
 - 2s - loss: 0.2953 - acc: 0.8466 - val_loss: 0.3306 - val_acc: 0.8304
Epoch 102/150
Epoch 00102: val_loss did not improve
 - 2s - loss: 0.3059 - acc: 0.8633 - val_loss: 0.3439 - val_acc: 0.8329
Epoch 103/150
Epoch 00103: val_loss did not improve
 - 2s - loss: 0.2991 - acc: 0.8614 - val_loss: 0.3499 - val_acc: 0.8329
Epoch 104/150
Epoch 00104: val_loss did not improve
 - 2s - loss: 0.3020 - acc: 0.8547 - val_loss: 0.3285 - val_acc: 0.8454
Epoch 105/150
Epoch 00105: val_loss did not improve
 - 2s - loss: 0.3088 - acc: 0.8500 - val_loss: 0.3357 - val_acc: 0.8404
Epoch 106/150
Epoch 00106: val_loss did not improve
 - 2s - loss: 0.3053 - acc: 0.8518 - val_loss: 0.3364 - val_acc: 0.8379
Epoch 107

Epoch 11/150
Epoch 00011: val_loss did not improve
 - 2s - loss: 0.4967 - acc: 0.7470 - val_loss: 0.4671 - val_acc: 0.7731
Epoch 12/150
Epoch 00012: val_loss improved from 0.44868 to 0.42830, saving model to best_m.h5
 - 2s - loss: 0.4764 - acc: 0.7781 - val_loss: 0.4283 - val_acc: 0.7980
Epoch 13/150
Epoch 00013: val_loss improved from 0.42830 to 0.41352, saving model to best_m.h5
 - 2s - loss: 0.4806 - acc: 0.7717 - val_loss: 0.4135 - val_acc: 0.8055
Epoch 14/150
Epoch 00014: val_loss improved from 0.41352 to 0.39920, saving model to best_m.h5
 - 2s - loss: 0.4471 - acc: 0.7872 - val_loss: 0.3992 - val_acc: 0.8055
Epoch 15/150
Epoch 00015: val_loss did not improve
 - 2s - loss: 0.4601 - acc: 0.7714 - val_loss: 0.4403 - val_acc: 0.7980
Epoch 16/150
Epoch 00016: val_loss did not improve
 - 2s - loss: 0.4451 - acc: 0.7941 - val_loss: 0.4090 - val_acc: 0.8155
Epoch 17/150
Epoch 00017: val_loss did not improve
 - 2s - loss: 0.4543 - acc: 0.7878 - val_loss: 0.4034 - val_acc: 0.7955
Epoch 1

Epoch 74/150
Epoch 00074: val_loss did not improve
 - 2s - loss: 0.3391 - acc: 0.8414 - val_loss: 0.2974 - val_acc: 0.8504
Epoch 75/150
Epoch 00075: val_loss did not improve
 - 2s - loss: 0.3230 - acc: 0.8500 - val_loss: 0.2972 - val_acc: 0.8628
Epoch 76/150
Epoch 00076: val_loss did not improve
 - 2s - loss: 0.3161 - acc: 0.8489 - val_loss: 0.2910 - val_acc: 0.8728
Epoch 77/150
Epoch 00077: val_loss improved from 0.28616 to 0.28503, saving model to best_m.h5
 - 2s - loss: 0.3349 - acc: 0.8370 - val_loss: 0.2850 - val_acc: 0.8703
Epoch 78/150
Epoch 00078: val_loss did not improve
 - 2s - loss: 0.3313 - acc: 0.8350 - val_loss: 0.2859 - val_acc: 0.8653
Epoch 79/150
Epoch 00079: val_loss did not improve
 - 2s - loss: 0.3470 - acc: 0.8425 - val_loss: 0.2900 - val_acc: 0.8603
Epoch 80/150
Epoch 00080: val_loss did not improve
 - 2s - loss: 0.3193 - acc: 0.8475 - val_loss: 0.2903 - val_acc: 0.8703
Epoch 81/150
Epoch 00081: val_loss did not improve
 - 2s - loss: 0.3357 - acc: 0.8458 - val_los

Epoch 139/150
Epoch 00139: val_loss did not improve
 - 2s - loss: 0.3068 - acc: 0.8475 - val_loss: 0.3079 - val_acc: 0.8504
Epoch 140/150
Epoch 00140: val_loss did not improve
 - 2s - loss: 0.2911 - acc: 0.8691 - val_loss: 0.2955 - val_acc: 0.8728
Epoch 141/150
Epoch 00141: val_loss did not improve
 - 2s - loss: 0.3228 - acc: 0.8550 - val_loss: 0.2841 - val_acc: 0.8778
Epoch 142/150
Epoch 00142: val_loss did not improve
 - 2s - loss: 0.3205 - acc: 0.8481 - val_loss: 0.2870 - val_acc: 0.8778
Epoch 143/150
Epoch 00143: val_loss did not improve
 - 2s - loss: 0.2956 - acc: 0.8591 - val_loss: 0.2925 - val_acc: 0.8628
Epoch 144/150
Epoch 00144: val_loss did not improve
 - 2s - loss: 0.2920 - acc: 0.8658 - val_loss: 0.2938 - val_acc: 0.8653
Epoch 145/150
Epoch 00145: val_loss did not improve
 - 2s - loss: 0.2992 - acc: 0.8491 - val_loss: 0.2855 - val_acc: 0.8753
Epoch 146/150
Epoch 00146: val_loss did not improve
 - 2s - loss: 0.2915 - acc: 0.8508 - val_loss: 0.2885 - val_acc: 0.8678
Epoch 14

Epoch 48/150
Epoch 00048: val_loss did not improve
 - 2s - loss: 0.3784 - acc: 0.8142 - val_loss: 0.3669 - val_acc: 0.8180
Epoch 49/150
Epoch 00049: val_loss improved from 0.35708 to 0.35660, saving model to best_m.h5
 - 2s - loss: 0.3881 - acc: 0.8131 - val_loss: 0.3566 - val_acc: 0.8379
Epoch 50/150
Epoch 00050: val_loss improved from 0.35660 to 0.35022, saving model to best_m.h5
 - 2s - loss: 0.3620 - acc: 0.8350 - val_loss: 0.3502 - val_acc: 0.8254
Epoch 51/150
Epoch 00051: val_loss did not improve
 - 2s - loss: 0.3909 - acc: 0.8222 - val_loss: 0.3542 - val_acc: 0.8304
Epoch 52/150
Epoch 00052: val_loss improved from 0.35022 to 0.34633, saving model to best_m.h5
 - 2s - loss: 0.3588 - acc: 0.8356 - val_loss: 0.3463 - val_acc: 0.8329
Epoch 53/150
Epoch 00053: val_loss did not improve
 - 2s - loss: 0.3657 - acc: 0.8331 - val_loss: 0.3587 - val_acc: 0.8279
Epoch 54/150
Epoch 00054: val_loss did not improve
 - 2s - loss: 0.3622 - acc: 0.8331 - val_loss: 0.3767 - val_acc: 0.8130
Epoch 5

Epoch 112/150
Epoch 00112: val_loss improved from 0.33540 to 0.33535, saving model to best_m.h5
 - 2s - loss: 0.3229 - acc: 0.8450 - val_loss: 0.3354 - val_acc: 0.8279
Epoch 113/150
Epoch 00113: val_loss did not improve
 - 2s - loss: 0.3144 - acc: 0.8533 - val_loss: 0.3471 - val_acc: 0.8254
Epoch 114/150
Epoch 00114: val_loss did not improve
 - 2s - loss: 0.3036 - acc: 0.8516 - val_loss: 0.3357 - val_acc: 0.8429
Epoch 115/150
Epoch 00115: val_loss did not improve
 - 2s - loss: 0.2968 - acc: 0.8675 - val_loss: 0.3518 - val_acc: 0.8180
Epoch 116/150
Epoch 00116: val_loss did not improve
 - 2s - loss: 0.2958 - acc: 0.8589 - val_loss: 0.3640 - val_acc: 0.8379
Epoch 117/150
Epoch 00117: val_loss did not improve
 - 2s - loss: 0.3260 - acc: 0.8508 - val_loss: 0.3417 - val_acc: 0.8254
Epoch 118/150
Epoch 00118: val_loss did not improve
 - 2s - loss: 0.3030 - acc: 0.8641 - val_loss: 0.3376 - val_acc: 0.8304
Epoch 119/150
Epoch 00119: val_loss improved from 0.33535 to 0.33263, saving model to be

In [12]:
with open('../features/cnn_3_aug_denoise_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2737
# new  2509

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_3_aug_denoise_preprocess.csv', index=False)


0.29239329972
         id  is_iceberg
0  5941774d    0.556787
1  4023181e    0.368750
2  b20200e4    0.013724
3  e7f018bb    0.988462
4  4371c8c3    0.335887


In [13]:
from keras.callbacks import ModelCheckpoint,LearningRateScheduler
def ConvBlock(model, layers, filters):
    '''Create [layers] layers consisting of zero padding, a convolution with [filters] 3x3 filters and batch normalization. Perform max pooling after the last layer.'''
    for i in range(layers):
        model.add(ZeroPadding2D((1, 1)))
        model.add(Conv2D(filters, (3, 3), activation='relu'))
        model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()

    # Input image: 75x75x3
    model.add(Lambda(lambda x: x, input_shape=(75, 75, 3)))
    ConvBlock(model, 1, 32)
    # 37x37x32
    ConvBlock(model, 1, 64)
    # 18x18x64
    ConvBlock(model, 1, 128)
    # 9x9x128
    ConvBlock(model, 1, 128)
    # 4x4x128
    model.add(Flatten())
    model.add(Dense(1,activation='sigmoid'))
    
    return model
print('model model')


model model


In [14]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=150, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/150
Epoch 00001: val_loss improved from inf to 0.73040, saving model to best_m.h5
 - 4s - loss: 0.7496 - acc: 0.6822 - val_loss: 0.7304 - val_acc: 0.4913
Epoch 2/150
Epoch 00002: val_loss did not improve
 - 2s - loss: 0.4875 - acc: 0.7658 - val_loss: 0.8919 - val_acc: 0.4913
Epoch 3/150
Epoch 00003: val_loss improved from 0.73040 to 0.67290, saving model to best_m.h5
 - 2s - loss: 0.5298 - acc: 0.7456 - val_loss: 0.6729 - val_acc: 0.5586
Epoch 4/150
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.4553 - acc: 0.7775 - val_loss: 0.7394 - val_acc: 0.4913
Epoch 5/150
Epoch 00005: val_loss improved from 0.67290 to 0.56563, saving model to best_m.h5
 - 2s - loss: 0.4693 - acc: 0.7767 - val_loss: 0.5656 - val_acc: 0.7232
Epoch 6/150
Epoch 00006: val_loss improved from 0.56563 to 0.47938, saving model to best_m.h5
 - 2s - loss: 0.4882 - acc: 0.7545 - val_loss: 0.4794 - val_acc: 0.7980
Epoch 7/150
Epoch 00007: val_loss improved from 0.47938 to 0.40398, saving model to best_m.h5
 -

Epoch 62/150
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.2385 - acc: 0.8983 - val_loss: 0.2717 - val_acc: 0.8878
Epoch 63/150
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.2173 - acc: 0.9033 - val_loss: 0.2530 - val_acc: 0.8903
Epoch 64/150
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.2319 - acc: 0.9058 - val_loss: 0.2360 - val_acc: 0.8903
Epoch 65/150
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.2233 - acc: 0.9042 - val_loss: 0.2423 - val_acc: 0.8928
Epoch 66/150
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.2246 - acc: 0.8989 - val_loss: 0.2584 - val_acc: 0.8903
Epoch 67/150
Epoch 00067: val_loss did not improve
 - 2s - loss: 0.2468 - acc: 0.8906 - val_loss: 0.2362 - val_acc: 0.9027
Epoch 68/150
Epoch 00068: val_loss did not improve
 - 2s - loss: 0.2378 - acc: 0.8973 - val_loss: 0.2697 - val_acc: 0.8828
Epoch 69/150
Epoch 00069: val_loss did not improve
 - 2s - loss: 0.2319 - acc: 0.9050 - val_loss: 0.2494 - val_acc: 0.8728
Epoch 70/150
Epo

Epoch 129/150
Epoch 00129: val_loss did not improve
 - 2s - loss: 0.1440 - acc: 0.9458 - val_loss: 0.2580 - val_acc: 0.8903
Epoch 130/150
Epoch 00130: val_loss did not improve
 - 2s - loss: 0.1567 - acc: 0.9392 - val_loss: 0.2826 - val_acc: 0.8703
Epoch 131/150
Epoch 00131: val_loss did not improve
 - 2s - loss: 0.1540 - acc: 0.9450 - val_loss: 0.2562 - val_acc: 0.8903
Epoch 132/150
Epoch 00132: val_loss did not improve
 - 2s - loss: 0.1542 - acc: 0.9342 - val_loss: 0.2714 - val_acc: 0.8853
Epoch 133/150
Epoch 00133: val_loss did not improve
 - 2s - loss: 0.1579 - acc: 0.9373 - val_loss: 0.2863 - val_acc: 0.8753
Epoch 134/150
Epoch 00134: val_loss did not improve
 - 2s - loss: 0.1401 - acc: 0.9458 - val_loss: 0.3368 - val_acc: 0.8753
Epoch 135/150
Epoch 00135: val_loss did not improve
 - 2s - loss: 0.1421 - acc: 0.9475 - val_loss: 0.2849 - val_acc: 0.8828
Epoch 136/150
Epoch 00136: val_loss did not improve
 - 2s - loss: 0.1368 - acc: 0.9431 - val_loss: 0.2707 - val_acc: 0.8703
Epoch 13

Epoch 40/150
Epoch 00040: val_loss improved from 0.27382 to 0.27370, saving model to best_m.h5
 - 2s - loss: 0.2568 - acc: 0.8906 - val_loss: 0.2737 - val_acc: 0.8653
Epoch 41/150
Epoch 00041: val_loss did not improve
 - 2s - loss: 0.2092 - acc: 0.9158 - val_loss: 0.2815 - val_acc: 0.8653
Epoch 42/150
Epoch 00042: val_loss did not improve
 - 2s - loss: 0.2502 - acc: 0.8892 - val_loss: 0.2985 - val_acc: 0.8579
Epoch 43/150
Epoch 00043: val_loss did not improve
 - 2s - loss: 0.2354 - acc: 0.8925 - val_loss: 0.3006 - val_acc: 0.8554
Epoch 44/150
Epoch 00044: val_loss did not improve
 - 2s - loss: 0.2142 - acc: 0.9081 - val_loss: 0.3088 - val_acc: 0.8579
Epoch 45/150
Epoch 00045: val_loss did not improve
 - 2s - loss: 0.2298 - acc: 0.9039 - val_loss: 0.2806 - val_acc: 0.8703
Epoch 46/150
Epoch 00046: val_loss did not improve
 - 2s - loss: 0.2542 - acc: 0.8914 - val_loss: 0.2873 - val_acc: 0.8753
Epoch 47/150
Epoch 00047: val_loss did not improve
 - 2s - loss: 0.2256 - acc: 0.9008 - val_los

Epoch 106/150
Epoch 00106: val_loss did not improve
 - 2s - loss: 0.2034 - acc: 0.9220 - val_loss: 0.3054 - val_acc: 0.8579
Epoch 107/150
Epoch 00107: val_loss did not improve
 - 2s - loss: 0.1599 - acc: 0.9367 - val_loss: 0.2933 - val_acc: 0.8653
Epoch 108/150
Epoch 00108: val_loss did not improve
 - 2s - loss: 0.1678 - acc: 0.9323 - val_loss: 0.3017 - val_acc: 0.8628
Epoch 109/150
Epoch 00109: val_loss did not improve
 - 2s - loss: 0.1627 - acc: 0.9308 - val_loss: 0.3213 - val_acc: 0.8554
Epoch 110/150
Epoch 00110: val_loss did not improve
 - 2s - loss: 0.1821 - acc: 0.9300 - val_loss: 0.2983 - val_acc: 0.8678
Epoch 111/150
Epoch 00111: val_loss did not improve
 - 2s - loss: 0.1773 - acc: 0.9195 - val_loss: 0.3367 - val_acc: 0.8703
Epoch 112/150
Epoch 00112: val_loss did not improve
 - 2s - loss: 0.1607 - acc: 0.9300 - val_loss: 0.3269 - val_acc: 0.8504
Epoch 113/150
Epoch 00113: val_loss did not improve
 - 2s - loss: 0.1620 - acc: 0.9317 - val_loss: 0.3161 - val_acc: 0.8628
Epoch 11

Epoch 20/150
Epoch 00020: val_loss did not improve
 - 2s - loss: 0.3176 - acc: 0.8608 - val_loss: 0.4125 - val_acc: 0.7955
Epoch 21/150
Epoch 00021: val_loss did not improve
 - 2s - loss: 0.3352 - acc: 0.8483 - val_loss: 0.3007 - val_acc: 0.8603
Epoch 22/150
Epoch 00022: val_loss did not improve
 - 2s - loss: 0.2868 - acc: 0.8683 - val_loss: 0.2785 - val_acc: 0.8753
Epoch 23/150
Epoch 00023: val_loss did not improve
 - 2s - loss: 0.3055 - acc: 0.8525 - val_loss: 0.2826 - val_acc: 0.8529
Epoch 24/150
Epoch 00024: val_loss improved from 0.27589 to 0.27482, saving model to best_m.h5
 - 2s - loss: 0.3246 - acc: 0.8545 - val_loss: 0.2748 - val_acc: 0.8753
Epoch 25/150
Epoch 00025: val_loss did not improve
 - 2s - loss: 0.3073 - acc: 0.8539 - val_loss: 0.3255 - val_acc: 0.8479
Epoch 26/150
Epoch 00026: val_loss did not improve
 - 2s - loss: 0.2943 - acc: 0.8600 - val_loss: 0.2887 - val_acc: 0.8753
Epoch 27/150
Epoch 00027: val_loss did not improve
 - 2s - loss: 0.3368 - acc: 0.8533 - val_los

Epoch 85/150
Epoch 00085: val_loss did not improve
 - 2s - loss: 0.1805 - acc: 0.9325 - val_loss: 0.2924 - val_acc: 0.8753
Epoch 86/150
Epoch 00086: val_loss did not improve
 - 2s - loss: 0.1839 - acc: 0.9292 - val_loss: 0.2704 - val_acc: 0.8628
Epoch 87/150
Epoch 00087: val_loss did not improve
 - 2s - loss: 0.1874 - acc: 0.9233 - val_loss: 0.2445 - val_acc: 0.8853
Epoch 88/150
Epoch 00088: val_loss did not improve
 - 2s - loss: 0.2307 - acc: 0.8981 - val_loss: 0.3005 - val_acc: 0.8703
Epoch 89/150
Epoch 00089: val_loss did not improve
 - 2s - loss: 0.1952 - acc: 0.9242 - val_loss: 0.2458 - val_acc: 0.8928
Epoch 90/150
Epoch 00090: val_loss did not improve
 - 2s - loss: 0.1855 - acc: 0.9242 - val_loss: 0.2545 - val_acc: 0.8828
Epoch 91/150
Epoch 00091: val_loss did not improve
 - 2s - loss: 0.1975 - acc: 0.9183 - val_loss: 0.2748 - val_acc: 0.8878
Epoch 92/150
Epoch 00092: val_loss did not improve
 - 2s - loss: 0.1940 - acc: 0.9133 - val_loss: 0.2984 - val_acc: 0.8778
Epoch 93/150
Epo

Epoch 1/150
Epoch 00001: val_loss improved from inf to 0.79452, saving model to best_m.h5
 - 5s - loss: 0.6620 - acc: 0.7189 - val_loss: 0.7945 - val_acc: 0.4938
Epoch 2/150
Epoch 00002: val_loss improved from 0.79452 to 0.68565, saving model to best_m.h5
 - 2s - loss: 0.4628 - acc: 0.7966 - val_loss: 0.6857 - val_acc: 0.5761
Epoch 3/150
Epoch 00003: val_loss improved from 0.68565 to 0.66360, saving model to best_m.h5
 - 3s - loss: 0.4651 - acc: 0.8006 - val_loss: 0.6636 - val_acc: 0.6135
Epoch 4/150
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.4339 - acc: 0.7933 - val_loss: 0.7271 - val_acc: 0.5436
Epoch 5/150
Epoch 00005: val_loss improved from 0.66360 to 0.61360, saving model to best_m.h5
 - 2s - loss: 0.4325 - acc: 0.8031 - val_loss: 0.6136 - val_acc: 0.6983
Epoch 6/150
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.4422 - acc: 0.8006 - val_loss: 0.6632 - val_acc: 0.6185
Epoch 7/150
Epoch 00007: val_loss improved from 0.61360 to 0.58417, saving model to best_m.h5
 -

Epoch 64/150
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.1831 - acc: 0.9258 - val_loss: 0.3667 - val_acc: 0.8454
Epoch 65/150
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.1885 - acc: 0.9167 - val_loss: 0.3887 - val_acc: 0.8404
Epoch 66/150
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.1824 - acc: 0.9181 - val_loss: 0.3651 - val_acc: 0.8554
Epoch 67/150
Epoch 00067: val_loss did not improve
 - 2s - loss: 0.1943 - acc: 0.9200 - val_loss: 0.3777 - val_acc: 0.8354
Epoch 68/150
Epoch 00068: val_loss did not improve
 - 2s - loss: 0.1837 - acc: 0.9283 - val_loss: 0.4066 - val_acc: 0.8254
Epoch 69/150
Epoch 00069: val_loss did not improve
 - 2s - loss: 0.2019 - acc: 0.9164 - val_loss: 0.3641 - val_acc: 0.8603
Epoch 70/150
Epoch 00070: val_loss did not improve
 - 2s - loss: 0.1750 - acc: 0.9317 - val_loss: 0.3866 - val_acc: 0.8304
Epoch 71/150
Epoch 00071: val_loss did not improve
 - 2s - loss: 0.1945 - acc: 0.9231 - val_loss: 0.4137 - val_acc: 0.8354
Epoch 72/150
Epo

Epoch 131/150
Epoch 00131: val_loss did not improve
 - 2s - loss: 0.1271 - acc: 0.9458 - val_loss: 0.4352 - val_acc: 0.8454
Epoch 132/150
Epoch 00132: val_loss did not improve
 - 2s - loss: 0.1260 - acc: 0.9523 - val_loss: 0.3925 - val_acc: 0.8554
Epoch 133/150
Epoch 00133: val_loss did not improve
 - 2s - loss: 0.1336 - acc: 0.9492 - val_loss: 0.4157 - val_acc: 0.8454
Epoch 134/150
Epoch 00134: val_loss did not improve
 - 2s - loss: 0.1136 - acc: 0.9506 - val_loss: 0.4977 - val_acc: 0.8354
Epoch 135/150
Epoch 00135: val_loss did not improve
 - 3s - loss: 0.1217 - acc: 0.9425 - val_loss: 0.4168 - val_acc: 0.8678
Epoch 136/150
Epoch 00136: val_loss did not improve
 - 3s - loss: 0.1201 - acc: 0.9542 - val_loss: 0.4240 - val_acc: 0.8504
Epoch 137/150
Epoch 00137: val_loss did not improve
 - 2s - loss: 0.1095 - acc: 0.9592 - val_loss: 0.4173 - val_acc: 0.8554
Epoch 138/150
Epoch 00138: val_loss did not improve
 - 2s - loss: 0.1330 - acc: 0.9542 - val_loss: 0.4068 - val_acc: 0.8479
Epoch 13

In [15]:
with open('../features/cnn_4_aug_denoise_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2146

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_4_aug_denoise_preprocess.csv', index=False)

0.264009776768
         id  is_iceberg
0  5941774d    0.498247
1  4023181e    0.635284
2  b20200e4    0.017909
3  e7f018bb    0.945060
4  4371c8c3    0.639482


In [16]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3), input_shape=(75, 75, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')


model model


In [17]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=150, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/150
Epoch 00001: val_loss improved from inf to 0.72417, saving model to best_m.h5
 - 8s - loss: 0.6554 - acc: 0.6481 - val_loss: 0.7242 - val_acc: 0.5087
Epoch 2/150
Epoch 00002: val_loss did not improve
 - 4s - loss: 0.5498 - acc: 0.7108 - val_loss: 0.8595 - val_acc: 0.5087
Epoch 3/150
Epoch 00003: val_loss did not improve
 - 4s - loss: 0.5210 - acc: 0.7495 - val_loss: 0.9133 - val_acc: 0.5087
Epoch 4/150
Epoch 00004: val_loss did not improve
 - 4s - loss: 0.5100 - acc: 0.7333 - val_loss: 0.8515 - val_acc: 0.5087
Epoch 5/150
Epoch 00005: val_loss did not improve
 - 4s - loss: 0.4680 - acc: 0.7864 - val_loss: 0.9414 - val_acc: 0.5087
Epoch 6/150
Epoch 00006: val_loss improved from 0.72417 to 0.68834, saving model to best_m.h5
 - 4s - loss: 0.4799 - acc: 0.7658 - val_loss: 0.6883 - val_acc: 0.5187
Epoch 7/150
Epoch 00007: val_loss improved from 0.68834 to 0.58382, saving model to best_m.h5
 - 4s - loss: 0.4825 - acc: 0.7731 - val_loss: 0.5838 - val_acc: 0.6958
Epoch 8/150
Epoch 

Epoch 63/150
Epoch 00063: val_loss did not improve
 - 3s - loss: 0.3120 - acc: 0.8600 - val_loss: 0.2979 - val_acc: 0.8603
Epoch 64/150
Epoch 00064: val_loss did not improve
 - 3s - loss: 0.3279 - acc: 0.8456 - val_loss: 0.2648 - val_acc: 0.8828
Epoch 65/150
Epoch 00065: val_loss did not improve
 - 3s - loss: 0.3397 - acc: 0.8416 - val_loss: 0.2908 - val_acc: 0.8653
Epoch 66/150
Epoch 00066: val_loss did not improve
 - 3s - loss: 0.3381 - acc: 0.8481 - val_loss: 0.3057 - val_acc: 0.8579
Epoch 67/150
Epoch 00067: val_loss did not improve
 - 3s - loss: 0.3469 - acc: 0.8278 - val_loss: 0.3002 - val_acc: 0.8579
Epoch 68/150
Epoch 00068: val_loss did not improve
 - 3s - loss: 0.3105 - acc: 0.8675 - val_loss: 0.2602 - val_acc: 0.8778
Epoch 69/150
Epoch 00069: val_loss did not improve
 - 3s - loss: 0.3154 - acc: 0.8642 - val_loss: 0.2883 - val_acc: 0.8678
Epoch 70/150
Epoch 00070: val_loss did not improve
 - 3s - loss: 0.3167 - acc: 0.8570 - val_loss: 0.2786 - val_acc: 0.8728
Epoch 71/150
Epo

Epoch 00129: val_loss did not improve
 - 3s - loss: 0.2821 - acc: 0.8670 - val_loss: 0.2683 - val_acc: 0.8653
Epoch 130/150
Epoch 00130: val_loss did not improve
 - 3s - loss: 0.2751 - acc: 0.8773 - val_loss: 0.2666 - val_acc: 0.8703
Epoch 131/150
Epoch 00131: val_loss did not improve
 - 3s - loss: 0.3079 - acc: 0.8575 - val_loss: 0.2808 - val_acc: 0.8678
Epoch 132/150
Epoch 00132: val_loss did not improve
 - 3s - loss: 0.2941 - acc: 0.8673 - val_loss: 0.3061 - val_acc: 0.8504
Epoch 133/150
Epoch 00133: val_loss did not improve
 - 3s - loss: 0.3035 - acc: 0.8612 - val_loss: 0.2843 - val_acc: 0.8678
Epoch 134/150
Epoch 00134: val_loss did not improve
 - 3s - loss: 0.2929 - acc: 0.8541 - val_loss: 0.2965 - val_acc: 0.8603
Epoch 135/150
Epoch 00135: val_loss did not improve
 - 3s - loss: 0.3141 - acc: 0.8603 - val_loss: 0.3038 - val_acc: 0.8504
Epoch 136/150
Epoch 00136: val_loss did not improve
 - 3s - loss: 0.3006 - acc: 0.8633 - val_loss: 0.3228 - val_acc: 0.8379
Epoch 137/150
Epoch 00

Epoch 42/150
Epoch 00042: val_loss did not improve
 - 3s - loss: 0.3315 - acc: 0.8508 - val_loss: 0.3641 - val_acc: 0.8379
Epoch 43/150
Epoch 00043: val_loss did not improve
 - 3s - loss: 0.3405 - acc: 0.8503 - val_loss: 0.3163 - val_acc: 0.8379
Epoch 44/150
Epoch 00044: val_loss improved from 0.31522 to 0.31153, saving model to best_m.h5
 - 3s - loss: 0.3343 - acc: 0.8608 - val_loss: 0.3115 - val_acc: 0.8429
Epoch 45/150
Epoch 00045: val_loss did not improve
 - 3s - loss: 0.2969 - acc: 0.8514 - val_loss: 0.3797 - val_acc: 0.8379
Epoch 46/150
Epoch 00046: val_loss did not improve
 - 3s - loss: 0.2939 - acc: 0.8683 - val_loss: 0.3309 - val_acc: 0.8628
Epoch 47/150
Epoch 00047: val_loss did not improve
 - 3s - loss: 0.3115 - acc: 0.8539 - val_loss: 0.3405 - val_acc: 0.8454
Epoch 48/150
Epoch 00048: val_loss did not improve
 - 3s - loss: 0.2842 - acc: 0.8775 - val_loss: 0.3297 - val_acc: 0.8479
Epoch 49/150
Epoch 00049: val_loss did not improve
 - 3s - loss: 0.3336 - acc: 0.8420 - val_los

Epoch 108/150
Epoch 00108: val_loss did not improve
 - 3s - loss: 0.2823 - acc: 0.8641 - val_loss: 0.3178 - val_acc: 0.8554
Epoch 109/150
Epoch 00109: val_loss did not improve
 - 3s - loss: 0.2780 - acc: 0.8723 - val_loss: 0.3617 - val_acc: 0.8279
Epoch 110/150
Epoch 00110: val_loss did not improve
 - 3s - loss: 0.2663 - acc: 0.8789 - val_loss: 0.4062 - val_acc: 0.8080
Epoch 111/150
Epoch 00111: val_loss did not improve
 - 3s - loss: 0.2756 - acc: 0.8783 - val_loss: 0.3565 - val_acc: 0.8354
Epoch 112/150
Epoch 00112: val_loss did not improve
 - 3s - loss: 0.2934 - acc: 0.8739 - val_loss: 0.3287 - val_acc: 0.8653
Epoch 113/150
Epoch 00113: val_loss did not improve
 - 3s - loss: 0.3026 - acc: 0.8781 - val_loss: 0.3246 - val_acc: 0.8579
Epoch 114/150
Epoch 00114: val_loss did not improve
 - 3s - loss: 0.2881 - acc: 0.8768 - val_loss: 0.3508 - val_acc: 0.8404
Epoch 115/150
Epoch 00115: val_loss did not improve
 - 3s - loss: 0.2970 - acc: 0.8642 - val_loss: 0.3745 - val_acc: 0.8404
Epoch 11

Epoch 22/150
Epoch 00022: val_loss improved from 0.36460 to 0.34732, saving model to best_m.h5
 - 3s - loss: 0.3913 - acc: 0.8256 - val_loss: 0.3473 - val_acc: 0.8703
Epoch 23/150
Epoch 00023: val_loss did not improve
 - 3s - loss: 0.3745 - acc: 0.8283 - val_loss: 0.3561 - val_acc: 0.8628
Epoch 24/150
Epoch 00024: val_loss improved from 0.34732 to 0.33845, saving model to best_m.h5
 - 3s - loss: 0.3569 - acc: 0.8350 - val_loss: 0.3385 - val_acc: 0.8554
Epoch 25/150
Epoch 00025: val_loss did not improve
 - 3s - loss: 0.4051 - acc: 0.8095 - val_loss: 0.3700 - val_acc: 0.8628
Epoch 26/150
Epoch 00026: val_loss did not improve
 - 3s - loss: 0.3733 - acc: 0.8297 - val_loss: 0.4035 - val_acc: 0.8030
Epoch 27/150
Epoch 00027: val_loss did not improve
 - 3s - loss: 0.3512 - acc: 0.8333 - val_loss: 0.4875 - val_acc: 0.7631
Epoch 28/150
Epoch 00028: val_loss improved from 0.33845 to 0.29383, saving model to best_m.h5
 - 3s - loss: 0.3721 - acc: 0.8350 - val_loss: 0.2938 - val_acc: 0.8753
Epoch 2

Epoch 86/150
Epoch 00086: val_loss did not improve
 - 3s - loss: 0.3007 - acc: 0.8583 - val_loss: 0.3088 - val_acc: 0.8678
Epoch 87/150
Epoch 00087: val_loss did not improve
 - 3s - loss: 0.3158 - acc: 0.8545 - val_loss: 0.3326 - val_acc: 0.8504
Epoch 88/150
Epoch 00088: val_loss did not improve
 - 3s - loss: 0.3365 - acc: 0.8456 - val_loss: 0.3036 - val_acc: 0.8778
Epoch 89/150
Epoch 00089: val_loss did not improve
 - 3s - loss: 0.3072 - acc: 0.8700 - val_loss: 0.2797 - val_acc: 0.8803
Epoch 90/150
Epoch 00090: val_loss improved from 0.26643 to 0.26634, saving model to best_m.h5
 - 3s - loss: 0.2822 - acc: 0.8725 - val_loss: 0.2663 - val_acc: 0.8803
Epoch 91/150
Epoch 00091: val_loss improved from 0.26634 to 0.25308, saving model to best_m.h5
 - 3s - loss: 0.3073 - acc: 0.8541 - val_loss: 0.2531 - val_acc: 0.8928
Epoch 92/150
Epoch 00092: val_loss did not improve
 - 3s - loss: 0.2784 - acc: 0.8725 - val_loss: 0.2653 - val_acc: 0.8753
Epoch 93/150
Epoch 00093: val_loss did not improve


 - 7s - loss: 0.6519 - acc: 0.6489 - val_loss: 0.7788 - val_acc: 0.5062
Epoch 2/150
Epoch 00002: val_loss did not improve
 - 3s - loss: 0.5713 - acc: 0.7195 - val_loss: 1.1601 - val_acc: 0.5062
Epoch 3/150
Epoch 00003: val_loss did not improve
 - 3s - loss: 0.5154 - acc: 0.7414 - val_loss: 0.9320 - val_acc: 0.5062
Epoch 4/150
Epoch 00004: val_loss did not improve
 - 3s - loss: 0.5001 - acc: 0.7468 - val_loss: 0.8117 - val_acc: 0.5062
Epoch 5/150
Epoch 00005: val_loss improved from 0.77880 to 0.65022, saving model to best_m.h5
 - 3s - loss: 0.4542 - acc: 0.7606 - val_loss: 0.6502 - val_acc: 0.6484
Epoch 6/150
Epoch 00006: val_loss did not improve
 - 3s - loss: 0.4489 - acc: 0.7770 - val_loss: 0.6539 - val_acc: 0.6060
Epoch 7/150
Epoch 00007: val_loss did not improve
 - 3s - loss: 0.4035 - acc: 0.8050 - val_loss: 0.8220 - val_acc: 0.5636
Epoch 8/150
Epoch 00008: val_loss did not improve
 - 3s - loss: 0.4524 - acc: 0.7937 - val_loss: 0.7278 - val_acc: 0.5736
Epoch 9/150
Epoch 00009: val_l

Epoch 66/150
Epoch 00066: val_loss did not improve
 - 3s - loss: 0.2836 - acc: 0.8681 - val_loss: 0.3655 - val_acc: 0.8229
Epoch 67/150
Epoch 00067: val_loss did not improve
 - 3s - loss: 0.2846 - acc: 0.8700 - val_loss: 0.3651 - val_acc: 0.8180
Epoch 68/150
Epoch 00068: val_loss did not improve
 - 3s - loss: 0.2912 - acc: 0.8691 - val_loss: 0.3775 - val_acc: 0.8180
Epoch 69/150
Epoch 00069: val_loss did not improve
 - 3s - loss: 0.3073 - acc: 0.8579 - val_loss: 0.3517 - val_acc: 0.8229
Epoch 70/150
Epoch 00070: val_loss improved from 0.34742 to 0.34002, saving model to best_m.h5
 - 3s - loss: 0.2901 - acc: 0.8741 - val_loss: 0.3400 - val_acc: 0.8354
Epoch 71/150
Epoch 00071: val_loss did not improve
 - 3s - loss: 0.2767 - acc: 0.8741 - val_loss: 0.3534 - val_acc: 0.8229
Epoch 72/150
Epoch 00072: val_loss did not improve
 - 3s - loss: 0.2887 - acc: 0.8791 - val_loss: 0.4292 - val_acc: 0.7980
Epoch 73/150
Epoch 00073: val_loss did not improve
 - 3s - loss: 0.2882 - acc: 0.8639 - val_los

Epoch 132/150
Epoch 00132: val_loss did not improve
 - 3s - loss: 0.2566 - acc: 0.8756 - val_loss: 0.3741 - val_acc: 0.8254
Epoch 133/150
Epoch 00133: val_loss did not improve
 - 3s - loss: 0.2697 - acc: 0.8764 - val_loss: 0.3383 - val_acc: 0.8379
Epoch 134/150
Epoch 00134: val_loss did not improve
 - 3s - loss: 0.2391 - acc: 0.8858 - val_loss: 0.3339 - val_acc: 0.8379
Epoch 135/150
Epoch 00135: val_loss did not improve
 - 3s - loss: 0.2609 - acc: 0.8875 - val_loss: 0.3480 - val_acc: 0.8379
Epoch 136/150
Epoch 00136: val_loss did not improve
 - 3s - loss: 0.3024 - acc: 0.8645 - val_loss: 0.3969 - val_acc: 0.8204
Epoch 137/150
Epoch 00137: val_loss did not improve
 - 3s - loss: 0.2717 - acc: 0.8814 - val_loss: 0.3498 - val_acc: 0.8329
Epoch 138/150
Epoch 00138: val_loss did not improve
 - 3s - loss: 0.2470 - acc: 0.8917 - val_loss: 0.3797 - val_acc: 0.8155
Epoch 139/150
Epoch 00139: val_loss improved from 0.33026 to 0.32699, saving model to best_m.h5
 - 3s - loss: 0.2656 - acc: 0.8748 -

In [18]:
with open('../features/cnn_5_aug_denoise_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))
# 2364

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_5_aug_denoise_preprocess.csv', index=False)

0.279755218796
         id  is_iceberg
0  5941774d    0.364896
1  4023181e    0.208586
2  b20200e4    0.256978
3  e7f018bb    0.972870
4  4371c8c3    0.256644
