In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.utils.np_utils import to_categorical
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Flatten, Lambda, Activation
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint,LearningRateScheduler,EarlyStopping

train_df = pd.read_json('../input/train.json')
test_df = pd.read_json('../input/test.json')

Using TensorFlow backend.


In [2]:
from skimage.exposure import equalize_adapthist

def std_img(x):
    for i in range(3):
        x[:, :, i] -= np.mean(x[:, :, i].flatten())
        x[:, :, i] /= np.std(x[:, :, i].flatten()) + 1e-7
    return x

def get_image(df):
    '''Create 3-channel 'images'. Return rescale-normalised images.'''
    images = []
    for i, row in df.iterrows():
        # Formulate the bands as 75x75 arrays
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 + band_2)/2
        
        # Rescale
        r = (band_1 - band_1.min()) / (band_1.max() - band_1.min())
        g = (band_2 - band_2.min()) / (band_2.max() - band_2.min())
        b = (band_3 - band_3.min()) / (band_3.max() - band_3.min())
        
        img = np.dstack([r,g,b])
        img = equalize_adapthist(img)
        #img = std_img(img)
        images.append(img)
    return np.array(images)


train_x = get_image(train_df)
test_x = get_image(test_df)

print(train_x.shape,test_x.shape)

  .format(dtypeobj_in, dtypeobj_out))


(1604, 75, 75, 3) (8424, 75, 75, 3)


In [3]:
y = train_df.is_iceberg.values
print(y[:5])

[0 0 1 0 0]


In [17]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(16, (3, 3), padding='same',input_shape=(75,75,3)))
    model.add(Activation('relu'))
    model.add(Conv2D(16, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    return model
print('model model')


model model


In [18]:
from sklearn.model_selection import KFold
from keras.preprocessing.image import ImageDataGenerator

def lr_f(epoch):
    if epoch<20:
        return 0.001
    elif epoch<70:
        return 0.0005
    else:
        return 0.0001
    

def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.62190, saving model to best_m.h5
 - 3s - loss: 0.6900 - acc: 0.5500 - val_loss: 0.6219 - val_acc: 0.6958
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 2s - loss: 0.6848 - acc: 0.6063 - val_loss: 0.6540 - val_acc: 0.6534
Epoch 3/120
Epoch 00003: val_loss improved from 0.62190 to 0.61919, saving model to best_m.h5
 - 2s - loss: 0.6555 - acc: 0.6022 - val_loss: 0.6192 - val_acc: 0.6160
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 1s - loss: 0.6190 - acc: 0.6403 - val_loss: 0.6300 - val_acc: 0.5087
Epoch 5/120
Epoch 00005: val_loss improved from 0.61919 to 0.58234, saving model to best_m.h5
 - 2s - loss: 0.6061 - acc: 0.6533 - val_loss: 0.5823 - val_acc: 0.6908
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.6169 - acc: 0.6503 - val_loss: 0.6222 - val_acc: 0.6883
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 1s - loss: 0.6055 - acc: 0.6741 - val_loss: 0.5856 - val_acc: 0.6858
Epoch 8/120
Epoch 

Epoch 59/120
Epoch 00059: val_loss did not improve
 - 1s - loss: 0.4156 - acc: 0.7858 - val_loss: 0.3592 - val_acc: 0.8130
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 1s - loss: 0.3902 - acc: 0.8001 - val_loss: 0.3101 - val_acc: 0.8603
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.3756 - acc: 0.8114 - val_loss: 0.3682 - val_acc: 0.8155
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3925 - acc: 0.7833 - val_loss: 0.3294 - val_acc: 0.8304
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.3933 - acc: 0.8076 - val_loss: 0.4678 - val_acc: 0.7830
Epoch 64/120
Epoch 00064: val_loss improved from 0.30230 to 0.28761, saving model to best_m.h5
 - 2s - loss: 0.3882 - acc: 0.8133 - val_loss: 0.2876 - val_acc: 0.8678
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.4154 - acc: 0.7875 - val_loss: 0.3122 - val_acc: 0.8504
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.3941 - acc: 0.8150 - val_los

Epoch 4/120
Epoch 00004: val_loss did not improve
 - 1s - loss: 0.6847 - acc: 0.5664 - val_loss: 0.6894 - val_acc: 0.5461
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 1s - loss: 0.6705 - acc: 0.5897 - val_loss: 0.6871 - val_acc: 0.5411
Epoch 6/120
Epoch 00006: val_loss improved from 0.67575 to 0.63351, saving model to best_m.h5
 - 2s - loss: 0.6646 - acc: 0.6020 - val_loss: 0.6335 - val_acc: 0.6484
Epoch 7/120
Epoch 00007: val_loss improved from 0.63351 to 0.62856, saving model to best_m.h5
 - 2s - loss: 0.6206 - acc: 0.6550 - val_loss: 0.6286 - val_acc: 0.6534
Epoch 8/120
Epoch 00008: val_loss improved from 0.62856 to 0.60325, saving model to best_m.h5
 - 2s - loss: 0.6286 - acc: 0.6180 - val_loss: 0.6032 - val_acc: 0.6758
Epoch 9/120
Epoch 00009: val_loss improved from 0.60325 to 0.58909, saving model to best_m.h5
 - 2s - loss: 0.5900 - acc: 0.6789 - val_loss: 0.5891 - val_acc: 0.6783
Epoch 10/120
Epoch 00010: val_loss did not improve
 - 1s - loss: 0.6200 - acc: 0.6308 - val_

Epoch 00062: val_loss did not improve
 - 2s - loss: 0.4125 - acc: 0.7903 - val_loss: 0.3836 - val_acc: 0.8080
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.4033 - acc: 0.8025 - val_loss: 0.3512 - val_acc: 0.8204
Epoch 64/120
Epoch 00064: val_loss improved from 0.34953 to 0.34330, saving model to best_m.h5
 - 2s - loss: 0.3964 - acc: 0.7972 - val_loss: 0.3433 - val_acc: 0.8354
Epoch 65/120
Epoch 00065: val_loss improved from 0.34330 to 0.32846, saving model to best_m.h5
 - 1s - loss: 0.3939 - acc: 0.8041 - val_loss: 0.3285 - val_acc: 0.8379
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.4030 - acc: 0.7991 - val_loss: 0.3455 - val_acc: 0.8204
Epoch 67/120
Epoch 00067: val_loss improved from 0.32846 to 0.31983, saving model to best_m.h5
 - 2s - loss: 0.3693 - acc: 0.8158 - val_loss: 0.3198 - val_acc: 0.8254
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 1s - loss: 0.3962 - acc: 0.8164 - val_loss: 0.3663 - val_acc: 0.8130
Epoch 69/120
Epoch 0

Epoch 2/120
Epoch 00002: val_loss improved from 0.64538 to 0.63246, saving model to best_m.h5
 - 2s - loss: 0.6820 - acc: 0.5453 - val_loss: 0.6325 - val_acc: 0.5786
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 1s - loss: 0.6466 - acc: 0.6133 - val_loss: 0.6357 - val_acc: 0.6633
Epoch 4/120
Epoch 00004: val_loss improved from 0.63246 to 0.60206, saving model to best_m.h5
 - 1s - loss: 0.6533 - acc: 0.6347 - val_loss: 0.6021 - val_acc: 0.6384
Epoch 5/120
Epoch 00005: val_loss improved from 0.60206 to 0.58349, saving model to best_m.h5
 - 1s - loss: 0.6113 - acc: 0.6650 - val_loss: 0.5835 - val_acc: 0.6783
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.6075 - acc: 0.6706 - val_loss: 0.5856 - val_acc: 0.6683
Epoch 7/120
Epoch 00007: val_loss improved from 0.58349 to 0.55711, saving model to best_m.h5
 - 2s - loss: 0.6034 - acc: 0.6925 - val_loss: 0.5571 - val_acc: 0.6958
Epoch 8/120
Epoch 00008: val_loss improved from 0.55711 to 0.53357, saving model to best_m.h

Epoch 00060: val_loss did not improve
 - 1s - loss: 0.3840 - acc: 0.8256 - val_loss: 0.3366 - val_acc: 0.8379
Epoch 61/120
Epoch 00061: val_loss improved from 0.31766 to 0.30926, saving model to best_m.h5
 - 2s - loss: 0.3444 - acc: 0.8342 - val_loss: 0.3093 - val_acc: 0.8653
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3500 - acc: 0.8416 - val_loss: 0.3105 - val_acc: 0.8653
Epoch 63/120
Epoch 00063: val_loss improved from 0.30926 to 0.30799, saving model to best_m.h5
 - 1s - loss: 0.3622 - acc: 0.8316 - val_loss: 0.3080 - val_acc: 0.8753
Epoch 64/120
Epoch 00064: val_loss improved from 0.30799 to 0.29972, saving model to best_m.h5
 - 2s - loss: 0.3533 - acc: 0.8187 - val_loss: 0.2997 - val_acc: 0.8678
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3689 - acc: 0.8197 - val_loss: 0.3202 - val_acc: 0.8354
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.3531 - acc: 0.8281 - val_loss: 0.3111 - val_acc: 0.8628
Epoch 67/120
Epoch 0

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.66328, saving model to best_m.h5
 - 3s - loss: 0.6877 - acc: 0.5330 - val_loss: 0.6633 - val_acc: 0.5062
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 1s - loss: 0.6860 - acc: 0.5203 - val_loss: 0.6928 - val_acc: 0.5062
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 1s - loss: 0.6896 - acc: 0.5708 - val_loss: 0.6938 - val_acc: 0.5062
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 1s - loss: 0.6862 - acc: 0.5536 - val_loss: 0.6827 - val_acc: 0.5062
Epoch 5/120
Epoch 00005: val_loss improved from 0.66328 to 0.65142, saving model to best_m.h5
 - 1s - loss: 0.6373 - acc: 0.5414 - val_loss: 0.6514 - val_acc: 0.5062
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.6650 - acc: 0.5317 - val_loss: 0.6931 - val_acc: 0.5062
Epoch 7/120
Epoch 00007: val_loss improved from 0.65142 to 0.61581, saving model to best_m.h5
 - 2s - loss: 0.6667 - acc: 0.5873 - val_loss: 0.6158 - val_acc: 0.6858
Epoch 8/120
Epoch 

Epoch 60/120
Epoch 00060: val_loss improved from 0.37529 to 0.37264, saving model to best_m.h5
 - 2s - loss: 0.4023 - acc: 0.8055 - val_loss: 0.3726 - val_acc: 0.7955
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.3833 - acc: 0.7967 - val_loss: 0.3896 - val_acc: 0.7905
Epoch 62/120
Epoch 00062: val_loss improved from 0.37264 to 0.35521, saving model to best_m.h5
 - 2s - loss: 0.3972 - acc: 0.7875 - val_loss: 0.3552 - val_acc: 0.8279
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.3817 - acc: 0.7958 - val_loss: 0.3719 - val_acc: 0.8080
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.3999 - acc: 0.8014 - val_loss: 0.3750 - val_acc: 0.7955
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3772 - acc: 0.8045 - val_loss: 0.3658 - val_acc: 0.8080
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.4038 - acc: 0.8116 - val_loss: 0.3573 - val_acc: 0.8180
Epoch 67/120
Epoch 00067: val_loss improved from 0.

In [19]:
import pickle
with open('../features/cnn_1_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
from sklearn.metrics import log_loss
print(log_loss(y,train_pred))

#pre 3219
# new 2965

# skimage 2725

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_1_aug_skimage_preprocess.csv', index=False)

0.296393091624
         id  is_iceberg
0  5941774d    0.466271
1  4023181e    0.553343
2  b20200e4    0.525201
3  e7f018bb    0.986323
4  4371c8c3    0.658490


In [20]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(32, (3, 3),input_shape=(75, 75, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1, activation='sigmoid'))
    return model
print('model model')

model model


In [22]:
def kfold_train(fold_cnt=3,rnd=428):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69115, saving model to best_m.h5
 - 4s - loss: 0.6938 - acc: 0.5020 - val_loss: 0.6912 - val_acc: 0.5461
Epoch 2/120
Epoch 00002: val_loss improved from 0.69115 to 0.69027, saving model to best_m.h5
 - 1s - loss: 0.6925 - acc: 0.5322 - val_loss: 0.6903 - val_acc: 0.5461
Epoch 3/120
Epoch 00003: val_loss improved from 0.69027 to 0.68929, saving model to best_m.h5
 - 1s - loss: 0.6910 - acc: 0.5414 - val_loss: 0.6893 - val_acc: 0.5461
Epoch 4/120
Epoch 00004: val_loss improved from 0.68929 to 0.68893, saving model to best_m.h5
 - 1s - loss: 0.6929 - acc: 0.5270 - val_loss: 0.6889 - val_acc: 0.5461
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 1s - loss: 0.6912 - acc: 0.5311 - val_loss: 0.6893 - val_acc: 0.5461
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.6924 - acc: 0.5255 - val_loss: 0.6899 - val_acc: 0.5461
Epoch 7/120
Epoch 00007: val_loss improved from 0.68893 to 0.64628, saving model to best_m.h5
 -

Epoch 58/120
Epoch 00058: val_loss did not improve
 - 1s - loss: 0.4026 - acc: 0.8214 - val_loss: 0.4104 - val_acc: 0.8155
Epoch 59/120
Epoch 00059: val_loss did not improve
 - 1s - loss: 0.3930 - acc: 0.8142 - val_loss: 0.4124 - val_acc: 0.8105
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 1s - loss: 0.4072 - acc: 0.8122 - val_loss: 0.4127 - val_acc: 0.8080
Epoch 61/120
Epoch 00061: val_loss improved from 0.38415 to 0.37466, saving model to best_m.h5
 - 1s - loss: 0.4051 - acc: 0.8016 - val_loss: 0.3747 - val_acc: 0.8304
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3971 - acc: 0.8026 - val_loss: 0.3821 - val_acc: 0.8105
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.4118 - acc: 0.7964 - val_loss: 0.4154 - val_acc: 0.8055
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.3960 - acc: 0.8125 - val_loss: 0.3754 - val_acc: 0.8229
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3708 - acc: 0.8190 - val_los

Epoch 120/120
Epoch 00120: val_loss did not improve
 - 1s - loss: 0.3533 - acc: 0.8406 - val_loss: 0.3453 - val_acc: 0.8404
Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69390, saving model to best_m.h5
 - 4s - loss: 0.6923 - acc: 0.5295 - val_loss: 0.6939 - val_acc: 0.4963
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 1s - loss: 0.6898 - acc: 0.5439 - val_loss: 0.6986 - val_acc: 0.4963
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 1s - loss: 0.6900 - acc: 0.5389 - val_loss: 0.7016 - val_acc: 0.4963
Epoch 4/120
Epoch 00004: val_loss improved from 0.69390 to 0.68610, saving model to best_m.h5
 - 1s - loss: 0.6890 - acc: 0.5303 - val_loss: 0.6861 - val_acc: 0.4963
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 1s - loss: 0.6729 - acc: 0.5499 - val_loss: 0.6951 - val_acc: 0.4963
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.6904 - acc: 0.5349 - val_loss: 0.6876 - val_acc: 0.4963
Epoch 7/120
Epoch 00007: val_loss improved from 0.68610 to 0

Epoch 60/120
Epoch 00060: val_loss did not improve
 - 1s - loss: 0.3262 - acc: 0.8397 - val_loss: 0.3615 - val_acc: 0.8329
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.3458 - acc: 0.8314 - val_loss: 0.3683 - val_acc: 0.8304
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3525 - acc: 0.8245 - val_loss: 0.3594 - val_acc: 0.8229
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.3254 - acc: 0.8520 - val_loss: 0.3689 - val_acc: 0.8354
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.3416 - acc: 0.8308 - val_loss: 0.3674 - val_acc: 0.8279
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3301 - acc: 0.8441 - val_loss: 0.3426 - val_acc: 0.8229
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.3564 - acc: 0.8306 - val_loss: 0.3573 - val_acc: 0.8304
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.3189 - acc: 0.8516 - val_loss: 0.3717 - val_acc: 0.8454
Epoch 68/120
Epo

Epoch 3/120
Epoch 00003: val_loss did not improve
 - 1s - loss: 0.6955 - acc: 0.5059 - val_loss: 0.6891 - val_acc: 0.5736
Epoch 4/120
Epoch 00004: val_loss improved from 0.68835 to 0.68791, saving model to best_m.h5
 - 1s - loss: 0.6933 - acc: 0.4951 - val_loss: 0.6879 - val_acc: 0.5736
Epoch 5/120
Epoch 00005: val_loss improved from 0.68791 to 0.68686, saving model to best_m.h5
 - 1s - loss: 0.6932 - acc: 0.4955 - val_loss: 0.6869 - val_acc: 0.5736
Epoch 6/120
Epoch 00006: val_loss improved from 0.68686 to 0.68621, saving model to best_m.h5
 - 1s - loss: 0.6929 - acc: 0.5301 - val_loss: 0.6862 - val_acc: 0.5736
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 1s - loss: 0.6926 - acc: 0.5247 - val_loss: 0.6885 - val_acc: 0.5736
Epoch 8/120
Epoch 00008: val_loss did not improve
 - 1s - loss: 0.6933 - acc: 0.5109 - val_loss: 0.6916 - val_acc: 0.5736
Epoch 9/120
Epoch 00009: val_loss did not improve
 - 1s - loss: 0.6927 - acc: 0.5347 - val_loss: 0.6894 - val_acc: 0.5736
Epoch 10/120
E

Epoch 62/120
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3859 - acc: 0.8108 - val_loss: 0.3191 - val_acc: 0.8554
Epoch 63/120
Epoch 00063: val_loss improved from 0.31769 to 0.30096, saving model to best_m.h5
 - 1s - loss: 0.3556 - acc: 0.8266 - val_loss: 0.3010 - val_acc: 0.8504
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.4058 - acc: 0.8056 - val_loss: 0.3745 - val_acc: 0.8105
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3560 - acc: 0.8266 - val_loss: 0.3103 - val_acc: 0.8603
Epoch 66/120
Epoch 00066: val_loss improved from 0.30096 to 0.29668, saving model to best_m.h5
 - 1s - loss: 0.3539 - acc: 0.8397 - val_loss: 0.2967 - val_acc: 0.8603
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.3760 - acc: 0.8224 - val_loss: 0.3119 - val_acc: 0.8678
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 1s - loss: 0.3483 - acc: 0.8333 - val_loss: 0.2994 - val_acc: 0.8628
Epoch 69/120
Epoch 00069: val_loss did not improve


Epoch 4/120
Epoch 00004: val_loss improved from 0.69244 to 0.69087, saving model to best_m.h5
 - 1s - loss: 0.6919 - acc: 0.5256 - val_loss: 0.6909 - val_acc: 0.5062
Epoch 5/120
Epoch 00005: val_loss improved from 0.69087 to 0.62288, saving model to best_m.h5
 - 1s - loss: 0.6632 - acc: 0.6062 - val_loss: 0.6229 - val_acc: 0.6833
Epoch 6/120
Epoch 00006: val_loss improved from 0.62288 to 0.60692, saving model to best_m.h5
 - 1s - loss: 0.6584 - acc: 0.5926 - val_loss: 0.6069 - val_acc: 0.5960
Epoch 7/120
Epoch 00007: val_loss improved from 0.60692 to 0.59364, saving model to best_m.h5
 - 1s - loss: 0.6319 - acc: 0.6122 - val_loss: 0.5936 - val_acc: 0.6783
Epoch 8/120
Epoch 00008: val_loss improved from 0.59364 to 0.56257, saving model to best_m.h5
 - 1s - loss: 0.6100 - acc: 0.6708 - val_loss: 0.5626 - val_acc: 0.7007
Epoch 9/120
Epoch 00009: val_loss did not improve
 - 1s - loss: 0.5938 - acc: 0.6837 - val_loss: 0.5702 - val_acc: 0.6733
Epoch 10/120
Epoch 00010: val_loss did not impro

Epoch 63/120
Epoch 00063: val_loss improved from 0.32339 to 0.31223, saving model to best_m.h5
 - 1s - loss: 0.3534 - acc: 0.8397 - val_loss: 0.3122 - val_acc: 0.8454
Epoch 64/120
Epoch 00064: val_loss improved from 0.31223 to 0.30920, saving model to best_m.h5
 - 1s - loss: 0.3608 - acc: 0.8358 - val_loss: 0.3092 - val_acc: 0.8454
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3452 - acc: 0.8356 - val_loss: 0.3476 - val_acc: 0.8454
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.3455 - acc: 0.8308 - val_loss: 0.3229 - val_acc: 0.8429
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.3361 - acc: 0.8366 - val_loss: 0.3170 - val_acc: 0.8504
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 1s - loss: 0.3644 - acc: 0.8212 - val_loss: 0.3189 - val_acc: 0.8429
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 1s - loss: 0.3689 - acc: 0.8297 - val_loss: 0.3512 - val_acc: 0.8354
Epoch 70/120
Epoch 00070: val_loss improved from 0.

In [23]:
with open('../features/cnn_2_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2728
# skimage 2710

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_2_aug_skimage_preprocess.csv', index=False)

0.307305449119
         id  is_iceberg
0  5941774d    0.638850
1  4023181e    0.498615
2  b20200e4    0.065239
3  e7f018bb    0.965096
4  4371c8c3    0.293843


In [24]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu' ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')

model model


In [25]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69345, saving model to best_m.h5
 - 5s - loss: 0.6978 - acc: 0.5205 - val_loss: 0.6935 - val_acc: 0.5087
Epoch 2/120
Epoch 00002: val_loss improved from 0.69345 to 0.69145, saving model to best_m.h5
 - 2s - loss: 0.6885 - acc: 0.5347 - val_loss: 0.6914 - val_acc: 0.5087
Epoch 3/120
Epoch 00003: val_loss improved from 0.69145 to 0.68975, saving model to best_m.h5
 - 2s - loss: 0.6766 - acc: 0.5714 - val_loss: 0.6898 - val_acc: 0.5087
Epoch 4/120
Epoch 00004: val_loss improved from 0.68975 to 0.67581, saving model to best_m.h5
 - 2s - loss: 0.6719 - acc: 0.5847 - val_loss: 0.6758 - val_acc: 0.5087
Epoch 5/120
Epoch 00005: val_loss improved from 0.67581 to 0.60508, saving model to best_m.h5
 - 2s - loss: 0.6295 - acc: 0.6197 - val_loss: 0.6051 - val_acc: 0.6983
Epoch 6/120
Epoch 00006: val_loss improved from 0.60508 to 0.57414, saving model to best_m.h5
 - 2s - loss: 0.6258 - acc: 0.6322 - val_loss: 0.5741 - val_acc: 0.7207
Epoch 7/

Epoch 59/120
Epoch 00059: val_loss improved from 0.34878 to 0.34851, saving model to best_m.h5
 - 2s - loss: 0.4260 - acc: 0.7967 - val_loss: 0.3485 - val_acc: 0.8404
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 2s - loss: 0.4239 - acc: 0.7858 - val_loss: 0.3697 - val_acc: 0.8254
Epoch 61/120
Epoch 00061: val_loss improved from 0.34851 to 0.32928, saving model to best_m.h5
 - 2s - loss: 0.4117 - acc: 0.7922 - val_loss: 0.3293 - val_acc: 0.8429
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.4283 - acc: 0.7858 - val_loss: 0.3451 - val_acc: 0.8479
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.4130 - acc: 0.8100 - val_loss: 0.3437 - val_acc: 0.8354
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.4327 - acc: 0.7925 - val_loss: 0.3361 - val_acc: 0.8354
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.4110 - acc: 0.8097 - val_loss: 0.3529 - val_acc: 0.8379
Epoch 66/120
Epoch 00066: val_loss did not improve


 - 5s - loss: 0.6970 - acc: 0.5078 - val_loss: 0.6902 - val_acc: 0.5461
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 2s - loss: 0.6928 - acc: 0.5220 - val_loss: 0.6907 - val_acc: 0.5461
Epoch 3/120
Epoch 00003: val_loss improved from 0.69022 to 0.68953, saving model to best_m.h5
 - 2s - loss: 0.6909 - acc: 0.5447 - val_loss: 0.6895 - val_acc: 0.5461
Epoch 4/120
Epoch 00004: val_loss improved from 0.68953 to 0.68952, saving model to best_m.h5
 - 2s - loss: 0.6923 - acc: 0.5183 - val_loss: 0.6895 - val_acc: 0.5461
Epoch 5/120
Epoch 00005: val_loss improved from 0.68952 to 0.68798, saving model to best_m.h5
 - 2s - loss: 0.6957 - acc: 0.5125 - val_loss: 0.6880 - val_acc: 0.5461
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.6868 - acc: 0.5264 - val_loss: 0.6913 - val_acc: 0.5461
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 2s - loss: 0.6937 - acc: 0.5155 - val_loss: 0.6890 - val_acc: 0.5461
Epoch 8/120
Epoch 00008: val_loss did not improve
 - 2s - loss: 

Epoch 60/120
Epoch 00060: val_loss did not improve
 - 2s - loss: 0.4724 - acc: 0.7733 - val_loss: 0.4417 - val_acc: 0.7756
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 2s - loss: 0.4981 - acc: 0.7464 - val_loss: 0.4392 - val_acc: 0.7980
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.4738 - acc: 0.7675 - val_loss: 0.4470 - val_acc: 0.7930
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.4676 - acc: 0.7706 - val_loss: 0.4828 - val_acc: 0.7506
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.5008 - acc: 0.7472 - val_loss: 0.4589 - val_acc: 0.7706
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.4687 - acc: 0.7697 - val_loss: 0.4377 - val_acc: 0.7805
Epoch 66/120
Epoch 00066: val_loss improved from 0.42931 to 0.42602, saving model to best_m.h5
 - 2s - loss: 0.4928 - acc: 0.7591 - val_loss: 0.4260 - val_acc: 0.8155
Epoch 67/120
Epoch 00067: val_loss improved from 0.42602 to 0.42240, saving model to best_m.h5


 - 5s - loss: 0.6966 - acc: 0.4951 - val_loss: 0.6924 - val_acc: 0.6559
Epoch 2/120
Epoch 00002: val_loss improved from 0.69242 to 0.69130, saving model to best_m.h5
 - 2s - loss: 0.6896 - acc: 0.5422 - val_loss: 0.6913 - val_acc: 0.5611
Epoch 3/120
Epoch 00003: val_loss improved from 0.69130 to 0.68928, saving model to best_m.h5
 - 2s - loss: 0.6945 - acc: 0.5034 - val_loss: 0.6893 - val_acc: 0.5611
Epoch 4/120
Epoch 00004: val_loss improved from 0.68928 to 0.66531, saving model to best_m.h5
 - 2s - loss: 0.6886 - acc: 0.5422 - val_loss: 0.6653 - val_acc: 0.6434
Epoch 5/120
Epoch 00005: val_loss improved from 0.66531 to 0.61947, saving model to best_m.h5
 - 2s - loss: 0.6550 - acc: 0.5997 - val_loss: 0.6195 - val_acc: 0.6534
Epoch 6/120
Epoch 00006: val_loss improved from 0.61947 to 0.60715, saving model to best_m.h5
 - 2s - loss: 0.6525 - acc: 0.6233 - val_loss: 0.6071 - val_acc: 0.6858
Epoch 7/120
Epoch 00007: val_loss improved from 0.60715 to 0.58749, saving model to best_m.h5
 - 2

Epoch 58/120
Epoch 00058: val_loss did not improve
 - 2s - loss: 0.3986 - acc: 0.8039 - val_loss: 0.4420 - val_acc: 0.7830
Epoch 59/120
Epoch 00059: val_loss did not improve
 - 2s - loss: 0.4017 - acc: 0.7997 - val_loss: 0.3424 - val_acc: 0.8254
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 2s - loss: 0.3726 - acc: 0.8283 - val_loss: 0.3439 - val_acc: 0.8279
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 2s - loss: 0.3818 - acc: 0.8174 - val_loss: 0.3619 - val_acc: 0.8279
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.3784 - acc: 0.8175 - val_loss: 0.3296 - val_acc: 0.8429
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.3928 - acc: 0.8106 - val_loss: 0.3854 - val_acc: 0.8329
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.3976 - acc: 0.8197 - val_loss: 0.3930 - val_acc: 0.8254
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.3815 - acc: 0.8070 - val_loss: 0.3440 - val_acc: 0.8155
Epoch 66/120
Epo

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69030, saving model to best_m.h5
 - 5s - loss: 0.6938 - acc: 0.5433 - val_loss: 0.6903 - val_acc: 0.5062
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 2s - loss: 0.6771 - acc: 0.5322 - val_loss: 0.6909 - val_acc: 0.5062
Epoch 3/120
Epoch 00003: val_loss improved from 0.69030 to 0.63921, saving model to best_m.h5
 - 2s - loss: 0.6435 - acc: 0.6187 - val_loss: 0.6392 - val_acc: 0.6484
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.6333 - acc: 0.6370 - val_loss: 0.6505 - val_acc: 0.6808
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 2s - loss: 0.6370 - acc: 0.5997 - val_loss: 0.6904 - val_acc: 0.5062
Epoch 6/120
Epoch 00006: val_loss improved from 0.63921 to 0.59358, saving model to best_m.h5
 - 2s - loss: 0.6343 - acc: 0.5983 - val_loss: 0.5936 - val_acc: 0.6858
Epoch 7/120
Epoch 00007: val_loss improved from 0.59358 to 0.59138, saving model to best_m.h5
 - 2s - loss: 0.5995 - acc: 0.6800 - val_loss:

Epoch 61/120
Epoch 00061: val_loss improved from 0.36024 to 0.35405, saving model to best_m.h5
 - 2s - loss: 0.3495 - acc: 0.8239 - val_loss: 0.3540 - val_acc: 0.8404
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.3404 - acc: 0.8383 - val_loss: 0.3595 - val_acc: 0.8354
Epoch 63/120
Epoch 00063: val_loss improved from 0.35405 to 0.34730, saving model to best_m.h5
 - 2s - loss: 0.3592 - acc: 0.8316 - val_loss: 0.3473 - val_acc: 0.8329
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.3710 - acc: 0.8289 - val_loss: 0.3869 - val_acc: 0.7880
Epoch 65/120
Epoch 00065: val_loss improved from 0.34730 to 0.34210, saving model to best_m.h5
 - 2s - loss: 0.3711 - acc: 0.8133 - val_loss: 0.3421 - val_acc: 0.8479
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.3604 - acc: 0.8358 - val_loss: 0.3458 - val_acc: 0.8254
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 2s - loss: 0.3396 - acc: 0.8425 - val_loss: 0.3555 - val_acc: 0.8329
Epoch 6

In [26]:
with open('../features/cnn_3_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2737
# new  2509

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_3_aug_skimage_preprocess.csv', index=False)


0.319404888954
         id  is_iceberg
0  5941774d    0.597344
1  4023181e    0.505602
2  b20200e4    0.201883
3  e7f018bb    0.984903
4  4371c8c3    0.580867


In [27]:
from keras.callbacks import ModelCheckpoint,LearningRateScheduler
def ConvBlock(model, layers, filters):
    '''Create [layers] layers consisting of zero padding, a convolution with [filters] 3x3 filters and batch normalization. Perform max pooling after the last layer.'''
    for i in range(layers):
        model.add(ZeroPadding2D((1, 1)))
        model.add(Conv2D(filters, (3, 3), activation='relu'))
        model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()

    # Input image: 75x75x3
    model.add(Lambda(lambda x: x, input_shape=(75, 75, 3)))
    ConvBlock(model, 1, 32)
    # 37x37x32
    ConvBlock(model, 1, 64)
    # 18x18x64
    ConvBlock(model, 1, 128)
    # 9x9x128
    ConvBlock(model, 1, 128)
    # 4x4x128
    model.add(Flatten())
    model.add(Dense(1,activation='sigmoid'))
    
    return model
print('model model')


model model


In [28]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.82608, saving model to best_m.h5
 - 6s - loss: 0.7381 - acc: 0.6539 - val_loss: 0.8261 - val_acc: 0.5087
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 2s - loss: 0.6385 - acc: 0.6970 - val_loss: 1.1154 - val_acc: 0.5087
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 2s - loss: 0.5527 - acc: 0.7522 - val_loss: 0.9179 - val_acc: 0.5087
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.4885 - acc: 0.7608 - val_loss: 1.5764 - val_acc: 0.5087
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 2s - loss: 0.4782 - acc: 0.7733 - val_loss: 1.4799 - val_acc: 0.5112
Epoch 6/120
Epoch 00006: val_loss improved from 0.82608 to 0.81601, saving model to best_m.h5
 - 2s - loss: 0.5424 - acc: 0.7317 - val_loss: 0.8160 - val_acc: 0.5885
Epoch 7/120
Epoch 00007: val_loss improved from 0.81601 to 0.81540, saving model to best_m.h5
 - 2s - loss: 0.4869 - acc: 0.7526 - val_loss: 0.8154 - val_acc: 0.6384
Epoch 8/120
Epoch 

Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.2150 - acc: 0.9117 - val_loss: 0.5975 - val_acc: 0.8155
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.2390 - acc: 0.9025 - val_loss: 0.5260 - val_acc: 0.8005
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.2383 - acc: 0.8989 - val_loss: 0.3033 - val_acc: 0.8778
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 2s - loss: 0.2524 - acc: 0.8992 - val_loss: 0.4448 - val_acc: 0.8180
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 2s - loss: 0.2332 - acc: 0.9025 - val_loss: 0.2681 - val_acc: 0.8703
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 2s - loss: 0.2343 - acc: 0.8925 - val_loss: 0.4769 - val_acc: 0.7930
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 2s - loss: 0.2157 - acc: 0.9033 - val_loss: 0.4707 - val_acc: 0.7656
Epoch 71/120
Epoch 00071: val_loss did not improve
 - 2s - loss: 0.1914 - acc: 0.9273 - val_loss: 0.3855 - val_acc: 0.8479
Epoch 72/120
Epo

Epoch 10/120
Epoch 00010: val_loss did not improve
 - 2s - loss: 0.4516 - acc: 0.7750 - val_loss: 0.4673 - val_acc: 0.7656
Epoch 11/120
Epoch 00011: val_loss improved from 0.44988 to 0.39803, saving model to best_m.h5
 - 2s - loss: 0.3974 - acc: 0.8200 - val_loss: 0.3980 - val_acc: 0.8180
Epoch 12/120
Epoch 00012: val_loss did not improve
 - 2s - loss: 0.3939 - acc: 0.8131 - val_loss: 0.4968 - val_acc: 0.7830
Epoch 13/120
Epoch 00013: val_loss did not improve
 - 2s - loss: 0.4283 - acc: 0.8025 - val_loss: 0.4781 - val_acc: 0.7805
Epoch 14/120
Epoch 00014: val_loss improved from 0.39803 to 0.37604, saving model to best_m.h5
 - 2s - loss: 0.4278 - acc: 0.8025 - val_loss: 0.3760 - val_acc: 0.8279
Epoch 15/120
Epoch 00015: val_loss did not improve
 - 2s - loss: 0.4087 - acc: 0.8206 - val_loss: 1.3595 - val_acc: 0.6758
Epoch 16/120
Epoch 00016: val_loss did not improve
 - 2s - loss: 0.4369 - acc: 0.7991 - val_loss: 0.6247 - val_acc: 0.7232
Epoch 17/120
Epoch 00017: val_loss improved from 0.

Epoch 74/120
Epoch 00074: val_loss did not improve
 - 2s - loss: 0.1859 - acc: 0.9214 - val_loss: 0.4701 - val_acc: 0.8254
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 2s - loss: 0.1847 - acc: 0.9208 - val_loss: 0.3075 - val_acc: 0.8653
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 2s - loss: 0.1874 - acc: 0.9192 - val_loss: 0.3100 - val_acc: 0.8678
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 2s - loss: 0.1922 - acc: 0.9189 - val_loss: 0.3129 - val_acc: 0.8603
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 2s - loss: 0.1767 - acc: 0.9256 - val_loss: 0.3465 - val_acc: 0.8554
Epoch 79/120
Epoch 00079: val_loss did not improve
 - 2s - loss: 0.1744 - acc: 0.9275 - val_loss: 0.2999 - val_acc: 0.8828
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 2s - loss: 0.1629 - acc: 0.9325 - val_loss: 0.3058 - val_acc: 0.8728
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 2s - loss: 0.1569 - acc: 0.9433 - val_loss: 0.2975 - val_acc: 0.8878
Epoch 82/120
Epo

Epoch 19/120
Epoch 00019: val_loss did not improve
 - 2s - loss: 0.3736 - acc: 0.8333 - val_loss: 0.5297 - val_acc: 0.7681
Epoch 20/120
Epoch 00020: val_loss did not improve
 - 2s - loss: 0.3594 - acc: 0.8331 - val_loss: 0.3884 - val_acc: 0.8254
Epoch 21/120
Epoch 00021: val_loss improved from 0.37503 to 0.37337, saving model to best_m.h5
 - 2s - loss: 0.3454 - acc: 0.8400 - val_loss: 0.3734 - val_acc: 0.8080
Epoch 22/120
Epoch 00022: val_loss improved from 0.37337 to 0.35807, saving model to best_m.h5
 - 2s - loss: 0.3231 - acc: 0.8566 - val_loss: 0.3581 - val_acc: 0.8454
Epoch 23/120
Epoch 00023: val_loss did not improve
 - 2s - loss: 0.3223 - acc: 0.8516 - val_loss: 0.9077 - val_acc: 0.7382
Epoch 24/120
Epoch 00024: val_loss did not improve
 - 2s - loss: 0.3624 - acc: 0.8347 - val_loss: 0.5514 - val_acc: 0.7731
Epoch 25/120
Epoch 00025: val_loss improved from 0.35807 to 0.31184, saving model to best_m.h5
 - 2s - loss: 0.3185 - acc: 0.8539 - val_loss: 0.3118 - val_acc: 0.8728
Epoch 2

Epoch 82/120
Epoch 00082: val_loss did not improve
 - 2s - loss: 0.1803 - acc: 0.9242 - val_loss: 0.2583 - val_acc: 0.9027
Epoch 83/120
Epoch 00083: val_loss did not improve
 - 3s - loss: 0.1958 - acc: 0.9156 - val_loss: 0.2877 - val_acc: 0.8828
Epoch 84/120
Epoch 00084: val_loss did not improve
 - 2s - loss: 0.1782 - acc: 0.9267 - val_loss: 0.2711 - val_acc: 0.8803
Epoch 85/120
Epoch 00085: val_loss did not improve
 - 2s - loss: 0.1736 - acc: 0.9298 - val_loss: 0.2672 - val_acc: 0.8803
Epoch 86/120
Epoch 00086: val_loss did not improve
 - 2s - loss: 0.1778 - acc: 0.9298 - val_loss: 0.2820 - val_acc: 0.8753
Epoch 87/120
Epoch 00087: val_loss did not improve
 - 2s - loss: 0.1783 - acc: 0.9242 - val_loss: 0.2836 - val_acc: 0.8853
Epoch 88/120
Epoch 00088: val_loss did not improve
 - 2s - loss: 0.1687 - acc: 0.9200 - val_loss: 0.2791 - val_acc: 0.8903
Epoch 89/120
Epoch 00089: val_loss improved from 0.25692 to 0.25051, saving model to best_m.h5
 - 2s - loss: 0.1641 - acc: 0.9367 - val_los

Epoch 25/120
Epoch 00025: val_loss did not improve
 - 2s - loss: 0.2920 - acc: 0.8623 - val_loss: 0.4282 - val_acc: 0.8279
Epoch 26/120
Epoch 00026: val_loss did not improve
 - 2s - loss: 0.2914 - acc: 0.8775 - val_loss: 0.3665 - val_acc: 0.8180
Epoch 27/120
Epoch 00027: val_loss did not improve
 - 2s - loss: 0.2786 - acc: 0.8683 - val_loss: 0.7698 - val_acc: 0.6958
Epoch 28/120
Epoch 00028: val_loss did not improve
 - 2s - loss: 0.2927 - acc: 0.8637 - val_loss: 0.4671 - val_acc: 0.7930
Epoch 29/120
Epoch 00029: val_loss did not improve
 - 2s - loss: 0.2929 - acc: 0.8692 - val_loss: 0.5753 - val_acc: 0.7631
Epoch 30/120
Epoch 00030: val_loss did not improve
 - 2s - loss: 0.3028 - acc: 0.8623 - val_loss: 0.4161 - val_acc: 0.8254
Epoch 31/120
Epoch 00031: val_loss did not improve
 - 2s - loss: 0.2863 - acc: 0.8725 - val_loss: 0.3863 - val_acc: 0.8429
Epoch 32/120
Epoch 00032: val_loss did not improve
 - 2s - loss: 0.3111 - acc: 0.8512 - val_loss: 0.3908 - val_acc: 0.8130
Epoch 33/120
Epo

Epoch 92/120
Epoch 00092: val_loss did not improve
 - 4s - loss: 0.1344 - acc: 0.9550 - val_loss: 0.4127 - val_acc: 0.8653
Epoch 93/120
Epoch 00093: val_loss did not improve
 - 5s - loss: 0.1302 - acc: 0.9406 - val_loss: 0.4254 - val_acc: 0.8653
Epoch 94/120
Epoch 00094: val_loss did not improve
 - 3s - loss: 0.1060 - acc: 0.9525 - val_loss: 0.4108 - val_acc: 0.8678
Epoch 95/120
Epoch 00095: val_loss did not improve
 - 2s - loss: 0.1316 - acc: 0.9483 - val_loss: 0.4130 - val_acc: 0.8628
Epoch 96/120
Epoch 00096: val_loss did not improve
 - 2s - loss: 0.1409 - acc: 0.9342 - val_loss: 0.3963 - val_acc: 0.8728
Epoch 97/120
Epoch 00097: val_loss did not improve
 - 2s - loss: 0.1226 - acc: 0.9464 - val_loss: 0.4022 - val_acc: 0.8728
Epoch 98/120
Epoch 00098: val_loss did not improve
 - 2s - loss: 0.1335 - acc: 0.9448 - val_loss: 0.4483 - val_acc: 0.8529
Epoch 99/120
Epoch 00099: val_loss did not improve
 - 2s - loss: 0.1173 - acc: 0.9517 - val_loss: 0.4491 - val_acc: 0.8628
Epoch 100/120
Ep

In [29]:
with open('../features/cnn_4_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))


submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_4_aug_skimage_preprocess.csv', index=False)

0.275677288674
         id  is_iceberg
0  5941774d    0.153425
1  4023181e    0.503719
2  b20200e4    0.210857
3  e7f018bb    0.907126
4  4371c8c3    0.691860


In [30]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3), input_shape=(75, 75, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')


model model


In [31]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.87364, saving model to best_m.h5
 - 10s - loss: 0.6705 - acc: 0.6205 - val_loss: 0.8736 - val_acc: 0.5087
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 3s - loss: 0.5852 - acc: 0.6700 - val_loss: 2.7320 - val_acc: 0.5087
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 4s - loss: 0.5685 - acc: 0.7041 - val_loss: 1.4383 - val_acc: 0.5087
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 5s - loss: 0.5363 - acc: 0.7212 - val_loss: 2.9665 - val_acc: 0.5087
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 4s - loss: 0.5295 - acc: 0.7406 - val_loss: 1.6610 - val_acc: 0.5087
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 3s - loss: 0.4848 - acc: 0.7456 - val_loss: 1.7407 - val_acc: 0.5087
Epoch 7/120
Epoch 00007: val_loss improved from 0.87364 to 0.85097, saving model to best_m.h5
 - 3s - loss: 0.4987 - acc: 0.7666 - val_loss: 0.8510 - val_acc: 0.5087
Epoch 8/120
Epoch 00008: val_loss did not improve
 - 3s - los

Epoch 65/120
Epoch 00065: val_loss did not improve
 - 4s - loss: 0.3200 - acc: 0.8470 - val_loss: 0.4260 - val_acc: 0.8304
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 5s - loss: 0.3514 - acc: 0.8422 - val_loss: 0.2968 - val_acc: 0.8603
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 4s - loss: 0.3104 - acc: 0.8750 - val_loss: 0.3034 - val_acc: 0.8454
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 3s - loss: 0.3172 - acc: 0.8633 - val_loss: 0.2741 - val_acc: 0.8953
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 3s - loss: 0.3127 - acc: 0.8575 - val_loss: 0.4033 - val_acc: 0.8055
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 3s - loss: 0.3436 - acc: 0.8514 - val_loss: 0.4186 - val_acc: 0.7930
Epoch 71/120
Epoch 00071: val_loss did not improve
 - 3s - loss: 0.3181 - acc: 0.8641 - val_loss: 0.2686 - val_acc: 0.8653
Epoch 72/120
Epoch 00072: val_loss did not improve
 - 3s - loss: 0.2834 - acc: 0.8775 - val_loss: 0.3437 - val_acc: 0.8429
Epoch 73/120
Epo

Epoch 9/120
Epoch 00009: val_loss improved from 0.51432 to 0.43880, saving model to best_m.h5
 - 4s - loss: 0.4865 - acc: 0.7575 - val_loss: 0.4388 - val_acc: 0.7955
Epoch 10/120
Epoch 00010: val_loss improved from 0.43880 to 0.38927, saving model to best_m.h5
 - 4s - loss: 0.4647 - acc: 0.7764 - val_loss: 0.3893 - val_acc: 0.8105
Epoch 11/120
Epoch 00011: val_loss did not improve
 - 3s - loss: 0.4429 - acc: 0.7883 - val_loss: 0.3903 - val_acc: 0.8204
Epoch 12/120
Epoch 00012: val_loss did not improve
 - 3s - loss: 0.4629 - acc: 0.7747 - val_loss: 0.4256 - val_acc: 0.8030
Epoch 13/120
Epoch 00013: val_loss did not improve
 - 3s - loss: 0.4455 - acc: 0.7708 - val_loss: 0.4726 - val_acc: 0.7456
Epoch 14/120
Epoch 00014: val_loss improved from 0.38927 to 0.38919, saving model to best_m.h5
 - 4s - loss: 0.4105 - acc: 0.7922 - val_loss: 0.3892 - val_acc: 0.8005
Epoch 15/120
Epoch 00015: val_loss improved from 0.38919 to 0.37960, saving model to best_m.h5
 - 4s - loss: 0.4162 - acc: 0.7981 -

Epoch 72/120
Epoch 00072: val_loss did not improve
 - 3s - loss: 0.3064 - acc: 0.8614 - val_loss: 0.2841 - val_acc: 0.8803
Epoch 73/120
Epoch 00073: val_loss did not improve
 - 4s - loss: 0.2880 - acc: 0.8739 - val_loss: 0.2832 - val_acc: 0.8628
Epoch 74/120
Epoch 00074: val_loss did not improve
 - 3s - loss: 0.2876 - acc: 0.8739 - val_loss: 0.2860 - val_acc: 0.8579
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 4s - loss: 0.3008 - acc: 0.8670 - val_loss: 0.2849 - val_acc: 0.8603
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 4s - loss: 0.3137 - acc: 0.8691 - val_loss: 0.2905 - val_acc: 0.8529
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 3s - loss: 0.3022 - acc: 0.8598 - val_loss: 0.3083 - val_acc: 0.8479
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 3s - loss: 0.2944 - acc: 0.8689 - val_loss: 0.2869 - val_acc: 0.8529
Epoch 79/120
Epoch 00079: val_loss improved from 0.28178 to 0.27729, saving model to best_m.h5
 - 4s - loss: 0.3021 - acc: 0.8706 - val_los

Epoch 16/120
Epoch 00016: val_loss did not improve
 - 3s - loss: 0.4054 - acc: 0.8025 - val_loss: 0.7218 - val_acc: 0.6683
Epoch 17/120
Epoch 00017: val_loss did not improve
 - 3s - loss: 0.4196 - acc: 0.8033 - val_loss: 0.4186 - val_acc: 0.7930
Epoch 18/120
Epoch 00018: val_loss did not improve
 - 3s - loss: 0.3790 - acc: 0.8108 - val_loss: 1.1936 - val_acc: 0.5835
Epoch 19/120
Epoch 00019: val_loss improved from 0.34564 to 0.33031, saving model to best_m.h5
 - 3s - loss: 0.4373 - acc: 0.7884 - val_loss: 0.3303 - val_acc: 0.8304
Epoch 20/120
Epoch 00020: val_loss did not improve
 - 3s - loss: 0.4354 - acc: 0.7808 - val_loss: 1.0436 - val_acc: 0.6010
Epoch 21/120
Epoch 00021: val_loss did not improve
 - 3s - loss: 0.3958 - acc: 0.8253 - val_loss: 0.4341 - val_acc: 0.7805
Epoch 22/120
Epoch 00022: val_loss did not improve
 - 3s - loss: 0.3715 - acc: 0.8100 - val_loss: 0.3457 - val_acc: 0.8329
Epoch 23/120
Epoch 00023: val_loss did not improve
 - 3s - loss: 0.3855 - acc: 0.8207 - val_los

Epoch 00079: val_loss did not improve
 - 3s - loss: 0.2961 - acc: 0.8700 - val_loss: 0.2429 - val_acc: 0.9027
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 4s - loss: 0.2621 - acc: 0.8900 - val_loss: 0.2476 - val_acc: 0.8903
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 4s - loss: 0.2586 - acc: 0.8817 - val_loss: 0.2543 - val_acc: 0.8728
Epoch 82/120
Epoch 00082: val_loss did not improve
 - 4s - loss: 0.2535 - acc: 0.8866 - val_loss: 0.2458 - val_acc: 0.8853
Epoch 83/120
Epoch 00083: val_loss did not improve
 - 4s - loss: 0.2358 - acc: 0.8983 - val_loss: 0.2687 - val_acc: 0.8853
Epoch 84/120
Epoch 00084: val_loss did not improve
 - 3s - loss: 0.2614 - acc: 0.8825 - val_loss: 0.2751 - val_acc: 0.8803
Epoch 85/120
Epoch 00085: val_loss did not improve
 - 4s - loss: 0.2512 - acc: 0.8850 - val_loss: 0.2528 - val_acc: 0.8878
Epoch 86/120
Epoch 00086: val_loss did not improve
 - 4s - loss: 0.3049 - acc: 0.8595 - val_loss: 0.2529 - val_acc: 0.8928
Epoch 87/120
Epoch 00087: val

Epoch 23/120
Epoch 00023: val_loss did not improve
 - 6s - loss: 0.3420 - acc: 0.8492 - val_loss: 0.4571 - val_acc: 0.8005
Epoch 24/120
Epoch 00024: val_loss did not improve
 - 5s - loss: 0.3913 - acc: 0.8137 - val_loss: 0.4287 - val_acc: 0.8005
Epoch 25/120
Epoch 00025: val_loss did not improve
 - 6s - loss: 0.3596 - acc: 0.8358 - val_loss: 0.5613 - val_acc: 0.7207
Epoch 26/120
Epoch 00026: val_loss did not improve
 - 5s - loss: 0.3214 - acc: 0.8525 - val_loss: 0.3459 - val_acc: 0.8404
Epoch 27/120
Epoch 00027: val_loss did not improve
 - 3s - loss: 0.3674 - acc: 0.8281 - val_loss: 0.5123 - val_acc: 0.7980
Epoch 28/120
Epoch 00028: val_loss did not improve
 - 3s - loss: 0.3507 - acc: 0.8383 - val_loss: 0.4433 - val_acc: 0.7880
Epoch 29/120
Epoch 00029: val_loss did not improve
 - 3s - loss: 0.3459 - acc: 0.8308 - val_loss: 0.3839 - val_acc: 0.8005
Epoch 30/120
Epoch 00030: val_loss did not improve
 - 3s - loss: 0.3367 - acc: 0.8350 - val_loss: 0.4390 - val_acc: 0.8180
Epoch 31/120
Epo

Epoch 89/120
Epoch 00089: val_loss did not improve
 - 3s - loss: 0.2709 - acc: 0.8833 - val_loss: 0.3657 - val_acc: 0.8429
Epoch 90/120
Epoch 00090: val_loss did not improve
 - 4s - loss: 0.2309 - acc: 0.8981 - val_loss: 0.3619 - val_acc: 0.8603
Epoch 91/120
Epoch 00091: val_loss did not improve
 - 3s - loss: 0.2381 - acc: 0.8992 - val_loss: 0.3748 - val_acc: 0.8304
Epoch 92/120
Epoch 00092: val_loss did not improve
 - 3s - loss: 0.2700 - acc: 0.8750 - val_loss: 0.3620 - val_acc: 0.8529
Epoch 93/120
Epoch 00093: val_loss did not improve
 - 4s - loss: 0.2653 - acc: 0.8831 - val_loss: 0.3621 - val_acc: 0.8529
Epoch 94/120
Epoch 00094: val_loss did not improve
 - 4s - loss: 0.2332 - acc: 0.8889 - val_loss: 0.3557 - val_acc: 0.8504
Epoch 95/120
Epoch 00095: val_loss did not improve
 - 4s - loss: 0.2408 - acc: 0.8958 - val_loss: 0.4032 - val_acc: 0.8329
Epoch 96/120
Epoch 00096: val_loss did not improve
 - 4s - loss: 0.2638 - acc: 0.8825 - val_loss: 0.3677 - val_acc: 0.8504
Epoch 97/120
Epo

In [32]:
with open('../features/cnn_5_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))
# 2364

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_5_aug_skimage_preprocess.csv', index=False)

0.265676284575
         id  is_iceberg
0  5941774d    0.149564
1  4023181e    0.352891
2  b20200e4    0.134070
3  e7f018bb    0.981369
4  4371c8c3    0.471172
