In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.utils.np_utils import to_categorical
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Flatten, Lambda, Activation
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint,LearningRateScheduler,EarlyStopping

train_df = pd.read_json('../input/train.json')
test_df = pd.read_json('../input/test.json')

Using TensorFlow backend.


In [2]:
from skimage.exposure import equalize_adapthist

def std_img(x):
    for i in range(3):
        x[:, :, i] -= np.mean(x[:, :, i].flatten())
        x[:, :, i] /= np.std(x[:, :, i].flatten()) + 1e-7
    return x

def get_image(df):
    '''Create 3-channel 'images'. Return rescale-normalised images.'''
    images = []
    for i, row in df.iterrows():
        # Formulate the bands as 75x75 arrays
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 + band_2)/2
        
        # Rescale
        r = (band_1 - band_1.min()) / (band_1.max() - band_1.min())
        g = (band_2 - band_2.min()) / (band_2.max() - band_2.min())
        b = (band_3 - band_3.min()) / (band_3.max() - band_3.min())
        
        img = np.dstack([r,g,b])
        img = equalize_adapthist(img)
        #img = std_img(img)
        images.append(img)
    return np.array(images)


train_x = get_image(train_df)
test_x = get_image(test_df)

print(train_x.shape,test_x.shape)

  .format(dtypeobj_in, dtypeobj_out))


(1604, 75, 75, 3) (8424, 75, 75, 3)


In [3]:
y = train_df.is_iceberg.values
print(y[:5])

[0 0 1 0 0]


In [4]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(16, (3, 3), padding='same',input_shape=(75,75,3)))
    model.add(Activation('relu'))
    model.add(Conv2D(16, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    return model
print('model model')


model model


In [5]:
from sklearn.model_selection import KFold
from keras.preprocessing.image import ImageDataGenerator

def lr_f(epoch):
    if epoch<20:
        return 0.001
    elif epoch<70:
        return 0.0005
    else:
        return 0.0001
    

def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=3)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.66793, saving model to best_m.h5
 - 2s - loss: 0.6932 - acc: 0.5487 - val_loss: 0.6679 - val_acc: 0.5140
Epoch 2/120
Epoch 00002: val_loss improved from 0.66793 to 0.63061, saving model to best_m.h5
 - 2s - loss: 0.6447 - acc: 0.6194 - val_loss: 0.6306 - val_acc: 0.5178
Epoch 3/120
Epoch 00003: val_loss improved from 0.63061 to 0.62696, saving model to best_m.h5
 - 1s - loss: 0.6414 - acc: 0.6068 - val_loss: 0.6270 - val_acc: 0.6879
Epoch 4/120
Epoch 00004: val_loss improved from 0.62696 to 0.60108, saving model to best_m.h5
 - 1s - loss: 0.6227 - acc: 0.6584 - val_loss: 0.6011 - val_acc: 0.6841
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 1s - loss: 0.6162 - acc: 0.6608 - val_loss: 0.6591 - val_acc: 0.5925
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.6147 - acc: 0.6462 - val_loss: 0.6255 - val_acc: 0.6748
Epoch 7/120
Epoch 00007: val_loss improved from 0.60108 to 0.59628, saving model to best_m.h5
 -

Epoch 59/120
Epoch 00059: val_loss did not improve
 - 1s - loss: 0.3565 - acc: 0.8217 - val_loss: 0.3124 - val_acc: 0.8505
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 1s - loss: 0.3495 - acc: 0.8150 - val_loss: 0.3075 - val_acc: 0.8505
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.3150 - acc: 0.8473 - val_loss: 0.3034 - val_acc: 0.8673
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3273 - acc: 0.8360 - val_loss: 0.3042 - val_acc: 0.8579
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.3420 - acc: 0.8263 - val_loss: 0.3048 - val_acc: 0.8710
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.3324 - acc: 0.8216 - val_loss: 0.3124 - val_acc: 0.8449
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3206 - acc: 0.8464 - val_loss: 0.3160 - val_acc: 0.8579
Epoch 66/120
Epoch 00066: val_loss improved from 0.30055 to 0.29360, saving model to best_m.h5
 - 1s - loss: 0.3418 - acc: 0.8343 - val_los

Epoch 120/120
Epoch 00120: val_loss did not improve
 - 1s - loss: 0.2779 - acc: 0.8711 - val_loss: 0.2576 - val_acc: 0.8935
Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.68872, saving model to best_m.h5
 - 1s - loss: 0.7042 - acc: 0.4961 - val_loss: 0.6887 - val_acc: 0.6299
Epoch 2/120
Epoch 00002: val_loss improved from 0.68872 to 0.68632, saving model to best_m.h5
 - 1s - loss: 0.6751 - acc: 0.5837 - val_loss: 0.6863 - val_acc: 0.6374
Epoch 3/120
Epoch 00003: val_loss improved from 0.68632 to 0.61028, saving model to best_m.h5
 - 1s - loss: 0.6622 - acc: 0.6116 - val_loss: 0.6103 - val_acc: 0.6411
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 1s - loss: 0.6299 - acc: 0.6530 - val_loss: 0.6116 - val_acc: 0.6187
Epoch 5/120
Epoch 00005: val_loss improved from 0.61028 to 0.57841, saving model to best_m.h5
 - 1s - loss: 0.6319 - acc: 0.6540 - val_loss: 0.5784 - val_acc: 0.6953
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.6078 - acc: 0.6637 - val_los

Epoch 59/120
Epoch 00059: val_loss did not improve
 - 1s - loss: 0.3286 - acc: 0.8469 - val_loss: 0.2732 - val_acc: 0.8729
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 1s - loss: 0.3460 - acc: 0.8336 - val_loss: 0.3100 - val_acc: 0.8710
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.3229 - acc: 0.8447 - val_loss: 0.2685 - val_acc: 0.8785
Epoch 62/120
Epoch 00062: val_loss improved from 0.26828 to 0.26682, saving model to best_m.h5
 - 1s - loss: 0.3287 - acc: 0.8388 - val_loss: 0.2668 - val_acc: 0.8710
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.3322 - acc: 0.8177 - val_loss: 0.2847 - val_acc: 0.8617
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.3303 - acc: 0.8469 - val_loss: 0.2886 - val_acc: 0.8617
Epoch 65/120
Epoch 00065: val_loss improved from 0.26682 to 0.26219, saving model to best_m.h5
 - 1s - loss: 0.3276 - acc: 0.8445 - val_loss: 0.2622 - val_acc: 0.8729
Epoch 66/120
Epoch 00066: val_loss did not improve


Epoch 2/120
Epoch 00002: val_loss improved from 0.67063 to 0.64291, saving model to best_m.h5
 - 1s - loss: 0.6426 - acc: 0.6161 - val_loss: 0.6429 - val_acc: 0.6255
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 1s - loss: 0.6575 - acc: 0.5875 - val_loss: 0.7097 - val_acc: 0.5094
Epoch 4/120
Epoch 00004: val_loss improved from 0.64291 to 0.61386, saving model to best_m.h5
 - 1s - loss: 0.6378 - acc: 0.6040 - val_loss: 0.6139 - val_acc: 0.6723
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 1s - loss: 0.6350 - acc: 0.6383 - val_loss: 0.6331 - val_acc: 0.6498
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.5992 - acc: 0.6636 - val_loss: 0.6491 - val_acc: 0.6030
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 1s - loss: 0.6072 - acc: 0.6660 - val_loss: 0.6153 - val_acc: 0.6779
Epoch 8/120
Epoch 00008: val_loss improved from 0.61386 to 0.60083, saving model to best_m.h5
 - 1s - loss: 0.6039 - acc: 0.6671 - val_loss: 0.6008 - val_acc: 0.6704
Epoch 9/120
Ep

Epoch 00060: val_loss did not improve
 - 1s - loss: 0.2976 - acc: 0.8569 - val_loss: 0.3351 - val_acc: 0.8277
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.3597 - acc: 0.8339 - val_loss: 0.3238 - val_acc: 0.8296
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.2786 - acc: 0.8785 - val_loss: 0.5121 - val_acc: 0.7528
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.3483 - acc: 0.8369 - val_loss: 0.3685 - val_acc: 0.8165
Epoch 64/120
Epoch 00064: val_loss improved from 0.31298 to 0.31241, saving model to best_m.h5
 - 1s - loss: 0.3024 - acc: 0.8519 - val_loss: 0.3124 - val_acc: 0.8352
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3136 - acc: 0.8504 - val_loss: 0.3125 - val_acc: 0.8371
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.2938 - acc: 0.8471 - val_loss: 0.3223 - val_acc: 0.8333
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.3108 - acc: 0.8474 - val_loss: 0.3194 - v

In [6]:
import pickle
with open('../features/cnn_1_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
from sklearn.metrics import log_loss
print(log_loss(y,train_pred))

#pre 3219
# new 2965

# skimage 2725

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_1_aug_skimage_preprocess.csv', index=False)

0.262588492501
         id  is_iceberg
0  5941774d    0.290037
1  4023181e    0.495171
2  b20200e4    0.756289
3  e7f018bb    0.993506
4  4371c8c3    0.858856


In [7]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(32, (3, 3),input_shape=(75, 75, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1, activation='sigmoid'))
    return model
print('model model')

model model


In [8]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=3)


with open('../features/cnn_2_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2728
# skimage 2710

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_2_aug_skimage_preprocess.csv', index=False)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69261, saving model to best_m.h5
 - 2s - loss: 0.6919 - acc: 0.5358 - val_loss: 0.6926 - val_acc: 0.5140
Epoch 2/120
Epoch 00002: val_loss improved from 0.69261 to 0.69196, saving model to best_m.h5
 - 1s - loss: 0.6922 - acc: 0.5280 - val_loss: 0.6920 - val_acc: 0.6542
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 1s - loss: 0.6929 - acc: 0.5199 - val_loss: 0.6928 - val_acc: 0.5140
Epoch 4/120
Epoch 00004: val_loss improved from 0.69196 to 0.69137, saving model to best_m.h5
 - 1s - loss: 0.6903 - acc: 0.5372 - val_loss: 0.6914 - val_acc: 0.5140
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 1s - loss: 0.6856 - acc: 0.5628 - val_loss: 0.6964 - val_acc: 0.5140
Epoch 6/120
Epoch 00006: val_loss improved from 0.69137 to 0.69121, saving model to best_m.h5
 - 1s - loss: 0.6888 - acc: 0.5337 - val_loss: 0.6912 - val_acc: 0.5140
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 1s - loss: 0.6803 - acc: 0.5500 - val_loss:

Epoch 00058: val_loss did not improve
 - 1s - loss: 0.3388 - acc: 0.8419 - val_loss: 0.3156 - val_acc: 0.8523
Epoch 59/120
Epoch 00059: val_loss did not improve
 - 1s - loss: 0.3460 - acc: 0.8282 - val_loss: 0.3635 - val_acc: 0.8093
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 1s - loss: 0.3260 - acc: 0.8521 - val_loss: 0.3167 - val_acc: 0.8430
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.3436 - acc: 0.8247 - val_loss: 0.3708 - val_acc: 0.8206
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3442 - acc: 0.8282 - val_loss: 0.3248 - val_acc: 0.8411
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.3372 - acc: 0.8365 - val_loss: 0.3110 - val_acc: 0.8467
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.3177 - acc: 0.8438 - val_loss: 0.3065 - val_acc: 0.8579
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3265 - acc: 0.8475 - val_loss: 0.3384 - val_acc: 0.8336
Epoch 66/120
Epoch 00066: val

 - 2s - loss: 0.6973 - acc: 0.5080 - val_loss: 0.6899 - val_acc: 0.5645
Epoch 2/120
Epoch 00002: val_loss did not improve
 - 1s - loss: 0.6937 - acc: 0.4991 - val_loss: 0.6909 - val_acc: 0.5645
Epoch 3/120
Epoch 00003: val_loss improved from 0.68992 to 0.68975, saving model to best_m.h5
 - 1s - loss: 0.6927 - acc: 0.5234 - val_loss: 0.6898 - val_acc: 0.5645
Epoch 4/120
Epoch 00004: val_loss improved from 0.68975 to 0.68939, saving model to best_m.h5
 - 1s - loss: 0.6924 - acc: 0.5205 - val_loss: 0.6894 - val_acc: 0.5645
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 1s - loss: 0.6932 - acc: 0.5129 - val_loss: 0.6899 - val_acc: 0.5645
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.6933 - acc: 0.5138 - val_loss: 0.6901 - val_acc: 0.5645
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 1s - loss: 0.6928 - acc: 0.5093 - val_loss: 0.6906 - val_acc: 0.5645
Epoch 8/120
Epoch 00008: val_loss did not improve
 - 1s - loss: 0.6931 - acc: 0.5181 - val_loss: 0.6899 - va

Epoch 68/120
Epoch 00068: val_loss did not improve
 - 1s - loss: 0.6929 - acc: 0.5098 - val_loss: 0.6899 - val_acc: 0.5645
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 1s - loss: 0.6922 - acc: 0.5203 - val_loss: 0.6899 - val_acc: 0.5645
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 1s - loss: 0.6929 - acc: 0.5138 - val_loss: 0.6898 - val_acc: 0.5645
Epoch 71/120
Epoch 00071: val_loss did not improve
 - 1s - loss: 0.6928 - acc: 0.5165 - val_loss: 0.6897 - val_acc: 0.5645
Epoch 72/120
Epoch 00072: val_loss did not improve
 - 1s - loss: 0.6927 - acc: 0.5101 - val_loss: 0.6898 - val_acc: 0.5645
Epoch 73/120
Epoch 00073: val_loss did not improve
 - 1s - loss: 0.6935 - acc: 0.5057 - val_loss: 0.6899 - val_acc: 0.5645
Epoch 74/120
Epoch 00074: val_loss did not improve
 - 1s - loss: 0.6915 - acc: 0.5250 - val_loss: 0.6898 - val_acc: 0.5645
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 1s - loss: 0.6933 - acc: 0.5076 - val_loss: 0.6898 - val_acc: 0.5645
Epoch 76/120
Epo

Epoch 12/120
Epoch 00012: val_loss did not improve
 - 1s - loss: 0.5831 - acc: 0.6893 - val_loss: 0.5902 - val_acc: 0.6573
Epoch 13/120
Epoch 00013: val_loss improved from 0.57190 to 0.55126, saving model to best_m.h5
 - 1s - loss: 0.5755 - acc: 0.6893 - val_loss: 0.5513 - val_acc: 0.7135
Epoch 14/120
Epoch 00014: val_loss did not improve
 - 1s - loss: 0.5844 - acc: 0.6798 - val_loss: 0.6078 - val_acc: 0.6442
Epoch 15/120
Epoch 00015: val_loss improved from 0.55126 to 0.54087, saving model to best_m.h5
 - 1s - loss: 0.5891 - acc: 0.6713 - val_loss: 0.5409 - val_acc: 0.7172
Epoch 16/120
Epoch 00016: val_loss did not improve
 - 1s - loss: 0.5879 - acc: 0.6750 - val_loss: 0.5547 - val_acc: 0.7303
Epoch 17/120
Epoch 00017: val_loss improved from 0.54087 to 0.52210, saving model to best_m.h5
 - 1s - loss: 0.5536 - acc: 0.6933 - val_loss: 0.5221 - val_acc: 0.7135
Epoch 18/120
Epoch 00018: val_loss did not improve
 - 1s - loss: 0.5137 - acc: 0.7419 - val_loss: 0.7027 - val_acc: 0.6105
Epoch 1

Epoch 71/120
Epoch 00071: val_loss did not improve
 - 1s - loss: 0.3172 - acc: 0.8456 - val_loss: 0.3406 - val_acc: 0.8352
Epoch 72/120
Epoch 00072: val_loss did not improve
 - 1s - loss: 0.3138 - acc: 0.8475 - val_loss: 0.3529 - val_acc: 0.8277
Epoch 73/120
Epoch 00073: val_loss did not improve
 - 1s - loss: 0.3389 - acc: 0.8406 - val_loss: 0.3407 - val_acc: 0.8315
Epoch 74/120
Epoch 00074: val_loss did not improve
 - 1s - loss: 0.3193 - acc: 0.8475 - val_loss: 0.3420 - val_acc: 0.8277
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 1s - loss: 0.3096 - acc: 0.8471 - val_loss: 0.3450 - val_acc: 0.8333
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 1s - loss: 0.3241 - acc: 0.8481 - val_loss: 0.3497 - val_acc: 0.8277
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 1s - loss: 0.3149 - acc: 0.8416 - val_loss: 0.3400 - val_acc: 0.8333
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 1s - loss: 0.3359 - acc: 0.8456 - val_loss: 0.3378 - val_acc: 0.8315
Epoch 79/120
Epo

In [9]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu' ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')

model model


In [10]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=3)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69275, saving model to best_m.h5
 - 3s - loss: 0.6976 - acc: 0.5031 - val_loss: 0.6928 - val_acc: 0.5140
Epoch 2/120
Epoch 00002: val_loss improved from 0.69275 to 0.69243, saving model to best_m.h5
 - 2s - loss: 0.6908 - acc: 0.5413 - val_loss: 0.6924 - val_acc: 0.5140
Epoch 3/120
Epoch 00003: val_loss improved from 0.69243 to 0.69182, saving model to best_m.h5
 - 2s - loss: 0.6896 - acc: 0.5492 - val_loss: 0.6918 - val_acc: 0.5140
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.6903 - acc: 0.5330 - val_loss: 0.6961 - val_acc: 0.5140
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 2s - loss: 0.6907 - acc: 0.5457 - val_loss: 0.6966 - val_acc: 0.5140
Epoch 6/120
Epoch 00006: val_loss improved from 0.69182 to 0.64811, saving model to best_m.h5
 - 2s - loss: 0.6754 - acc: 0.5691 - val_loss: 0.6481 - val_acc: 0.6673
Epoch 7/120
Epoch 00007: val_loss improved from 0.64811 to 0.64140, saving model to best_m.h5
 -

Epoch 59/120
Epoch 00059: val_loss did not improve
 - 2s - loss: 0.3981 - acc: 0.8094 - val_loss: 0.3950 - val_acc: 0.8299
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 2s - loss: 0.3957 - acc: 0.7960 - val_loss: 0.3692 - val_acc: 0.7963
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 2s - loss: 0.3910 - acc: 0.8121 - val_loss: 0.4162 - val_acc: 0.8150
Epoch 62/120
Epoch 00062: val_loss improved from 0.35126 to 0.34858, saving model to best_m.h5
 - 2s - loss: 0.4052 - acc: 0.8059 - val_loss: 0.3486 - val_acc: 0.8430
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.3689 - acc: 0.8282 - val_loss: 0.3893 - val_acc: 0.8280
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.3850 - acc: 0.8163 - val_loss: 0.3664 - val_acc: 0.8336
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.3851 - acc: 0.8081 - val_loss: 0.3540 - val_acc: 0.8486
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.3788 - acc: 0.8097 - val_los

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.68325, saving model to best_m.h5
 - 3s - loss: 0.6955 - acc: 0.5311 - val_loss: 0.6833 - val_acc: 0.5757
Epoch 2/120
Epoch 00002: val_loss improved from 0.68325 to 0.65453, saving model to best_m.h5
 - 2s - loss: 0.6858 - acc: 0.5617 - val_loss: 0.6545 - val_acc: 0.6598
Epoch 3/120
Epoch 00003: val_loss improved from 0.65453 to 0.61883, saving model to best_m.h5
 - 2s - loss: 0.6616 - acc: 0.6274 - val_loss: 0.6188 - val_acc: 0.6523
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.6895 - acc: 0.5335 - val_loss: 0.6900 - val_acc: 0.5925
Epoch 5/120
Epoch 00005: val_loss improved from 0.61883 to 0.60358, saving model to best_m.h5
 - 2s - loss: 0.6709 - acc: 0.6173 - val_loss: 0.6036 - val_acc: 0.6505
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.6565 - acc: 0.6045 - val_loss: 0.6718 - val_acc: 0.6449
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 2s - loss: 0.6373 - acc: 0.6774 - val_loss:

Epoch 59/120
Epoch 00059: val_loss improved from 0.32931 to 0.32834, saving model to best_m.h5
 - 2s - loss: 0.3841 - acc: 0.8078 - val_loss: 0.3283 - val_acc: 0.8430
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 2s - loss: 0.3810 - acc: 0.8153 - val_loss: 0.3467 - val_acc: 0.8430
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 2s - loss: 0.3722 - acc: 0.8227 - val_loss: 0.3291 - val_acc: 0.8486
Epoch 62/120
Epoch 00062: val_loss improved from 0.32834 to 0.30654, saving model to best_m.h5
 - 2s - loss: 0.3772 - acc: 0.8158 - val_loss: 0.3065 - val_acc: 0.8654
Epoch 63/120
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.3765 - acc: 0.8279 - val_loss: 0.3320 - val_acc: 0.8654
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.3632 - acc: 0.8338 - val_loss: 0.3148 - val_acc: 0.8486
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.3633 - acc: 0.8267 - val_loss: 0.3410 - val_acc: 0.8486
Epoch 66/120
Epoch 00066: val_loss did not improve


Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69255, saving model to best_m.h5
 - 3s - loss: 0.6958 - acc: 0.5226 - val_loss: 0.6925 - val_acc: 0.5131
Epoch 2/120
Epoch 00002: val_loss improved from 0.69255 to 0.68996, saving model to best_m.h5
 - 2s - loss: 0.6889 - acc: 0.5306 - val_loss: 0.6900 - val_acc: 0.4869
Epoch 3/120
Epoch 00003: val_loss improved from 0.68996 to 0.67390, saving model to best_m.h5
 - 2s - loss: 0.6889 - acc: 0.5471 - val_loss: 0.6739 - val_acc: 0.6273
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.6631 - acc: 0.5933 - val_loss: 0.6865 - val_acc: 0.5131
Epoch 5/120
Epoch 00005: val_loss improved from 0.67390 to 0.64743, saving model to best_m.h5
 - 2s - loss: 0.6728 - acc: 0.5748 - val_loss: 0.6474 - val_acc: 0.6479
Epoch 6/120
Epoch 00006: val_loss improved from 0.64743 to 0.60843, saving model to best_m.h5
 - 2s - loss: 0.6348 - acc: 0.6427 - val_loss: 0.6084 - val_acc: 0.6835
Epoch 7/120
Epoch 00007: val_loss did not improve
 -

Epoch 58/120
Epoch 00058: val_loss did not improve
 - 2s - loss: 0.3639 - acc: 0.8129 - val_loss: 0.3957 - val_acc: 0.8146
Epoch 59/120
Epoch 00059: val_loss did not improve
 - 2s - loss: 0.4393 - acc: 0.7883 - val_loss: 0.3781 - val_acc: 0.8221
Epoch 60/120
Epoch 00060: val_loss did not improve
 - 2s - loss: 0.3987 - acc: 0.7988 - val_loss: 0.3790 - val_acc: 0.8277
Epoch 61/120
Epoch 00061: val_loss did not improve
 - 2s - loss: 0.3672 - acc: 0.8231 - val_loss: 0.4511 - val_acc: 0.7884
Epoch 62/120
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.3771 - acc: 0.8144 - val_loss: 0.3787 - val_acc: 0.8315
Epoch 63/120
Epoch 00063: val_loss improved from 0.36880 to 0.35569, saving model to best_m.h5
 - 2s - loss: 0.3701 - acc: 0.8140 - val_loss: 0.3557 - val_acc: 0.8408
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.3642 - acc: 0.8141 - val_loss: 0.4056 - val_acc: 0.8127
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.3925 - acc: 0.8064 - val_los



In [11]:
with open('../features/cnn_3_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2737
# new  2509

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_3_aug_skimage_preprocess.csv', index=False)


0.301768397506
         id  is_iceberg
0  5941774d    0.383672
1  4023181e    0.652315
2  b20200e4    0.759469
3  e7f018bb    0.985744
4  4371c8c3    0.643436


In [12]:
from keras.callbacks import ModelCheckpoint,LearningRateScheduler
def ConvBlock(model, layers, filters):
    '''Create [layers] layers consisting of zero padding, a convolution with [filters] 3x3 filters and batch normalization. Perform max pooling after the last layer.'''
    for i in range(layers):
        model.add(ZeroPadding2D((1, 1)))
        model.add(Conv2D(filters, (3, 3), activation='relu'))
        model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()

    # Input image: 75x75x3
    model.add(Lambda(lambda x: x, input_shape=(75, 75, 3)))
    ConvBlock(model, 1, 32)
    # 37x37x32
    ConvBlock(model, 1, 64)
    # 18x18x64
    ConvBlock(model, 1, 128)
    # 9x9x128
    ConvBlock(model, 1, 128)
    # 4x4x128
    model.add(Flatten())
    model.add(Dense(1,activation='sigmoid'))
    
    return model
print('model model')


model model


In [13]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=3)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.69167, saving model to best_m.h5
 - 3s - loss: 0.7697 - acc: 0.6538 - val_loss: 0.6917 - val_acc: 0.5159
Epoch 2/120
Epoch 00002: val_loss improved from 0.69167 to 0.67046, saving model to best_m.h5
 - 2s - loss: 0.5010 - acc: 0.7496 - val_loss: 0.6705 - val_acc: 0.6000
Epoch 3/120
Epoch 00003: val_loss improved from 0.67046 to 0.65653, saving model to best_m.h5
 - 2s - loss: 0.5288 - acc: 0.7453 - val_loss: 0.6565 - val_acc: 0.5626
Epoch 4/120
Epoch 00004: val_loss improved from 0.65653 to 0.64094, saving model to best_m.h5
 - 2s - loss: 0.4758 - acc: 0.7716 - val_loss: 0.6409 - val_acc: 0.5925
Epoch 5/120
Epoch 00005: val_loss improved from 0.64094 to 0.60327, saving model to best_m.h5
 - 2s - loss: 0.4626 - acc: 0.7818 - val_loss: 0.6033 - val_acc: 0.7103
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.4407 - acc: 0.7941 - val_loss: 0.8625 - val_acc: 0.5252
Epoch 7/120
Epoch 00007: val_loss improved from 0.60

Epoch 63/120
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.2246 - acc: 0.8973 - val_loss: 0.7038 - val_acc: 0.7252
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.1987 - acc: 0.9164 - val_loss: 0.5503 - val_acc: 0.7607
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.2181 - acc: 0.9041 - val_loss: 0.3794 - val_acc: 0.8206
Epoch 66/120
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.1745 - acc: 0.9231 - val_loss: 0.3878 - val_acc: 0.8280
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 2s - loss: 0.1869 - acc: 0.9261 - val_loss: 0.2642 - val_acc: 0.9047
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 2s - loss: 0.2089 - acc: 0.9148 - val_loss: 0.5409 - val_acc: 0.7720
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 2s - loss: 0.2058 - acc: 0.9136 - val_loss: 1.0316 - val_acc: 0.6991
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 2s - loss: 0.1947 - acc: 0.9174 - val_loss: 0.4175 - val_acc: 0.8187
Epoch 71/120
Epo

Epoch 6/120
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.4383 - acc: 0.7954 - val_loss: 0.5371 - val_acc: 0.6841
Epoch 7/120
Epoch 00007: val_loss improved from 0.50740 to 0.43343, saving model to best_m.h5
 - 2s - loss: 0.4380 - acc: 0.7842 - val_loss: 0.4334 - val_acc: 0.7907
Epoch 8/120
Epoch 00008: val_loss did not improve
 - 2s - loss: 0.4311 - acc: 0.7879 - val_loss: 0.4348 - val_acc: 0.7907
Epoch 9/120
Epoch 00009: val_loss improved from 0.43343 to 0.35887, saving model to best_m.h5
 - 2s - loss: 0.4263 - acc: 0.7842 - val_loss: 0.3589 - val_acc: 0.8206
Epoch 10/120
Epoch 00010: val_loss did not improve
 - 2s - loss: 0.4245 - acc: 0.7886 - val_loss: 0.3735 - val_acc: 0.8206
Epoch 11/120
Epoch 00011: val_loss improved from 0.35887 to 0.34763, saving model to best_m.h5
 - 2s - loss: 0.5005 - acc: 0.7642 - val_loss: 0.3476 - val_acc: 0.8374
Epoch 12/120
Epoch 00012: val_loss did not improve
 - 2s - loss: 0.3963 - acc: 0.8154 - val_loss: 1.0282 - val_acc: 0.6449
Epoch 13/12

Epoch 70/120
Epoch 00070: val_loss did not improve
 - 2s - loss: 0.2117 - acc: 0.9162 - val_loss: 0.3904 - val_acc: 0.8449
Epoch 71/120
Epoch 00071: val_loss did not improve
 - 2s - loss: 0.1734 - acc: 0.9240 - val_loss: 0.2679 - val_acc: 0.8916
Epoch 72/120
Epoch 00072: val_loss did not improve
 - 2s - loss: 0.1597 - acc: 0.9307 - val_loss: 0.2551 - val_acc: 0.9009
Epoch 73/120
Epoch 00073: val_loss improved from 0.25063 to 0.25023, saving model to best_m.h5
 - 2s - loss: 0.1481 - acc: 0.9392 - val_loss: 0.2502 - val_acc: 0.9047
Epoch 74/120
Epoch 00074: val_loss did not improve
 - 2s - loss: 0.1416 - acc: 0.9508 - val_loss: 0.2502 - val_acc: 0.9047
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 2s - loss: 0.1493 - acc: 0.9333 - val_loss: 0.2648 - val_acc: 0.8972
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 2s - loss: 0.1440 - acc: 0.9441 - val_loss: 0.2607 - val_acc: 0.9084
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 2s - loss: 0.1199 - acc: 0.9460 - val_los

Epoch 14/120
Epoch 00014: val_loss did not improve
 - 2s - loss: 0.3864 - acc: 0.8186 - val_loss: 0.6506 - val_acc: 0.6929
Epoch 15/120
Epoch 00015: val_loss did not improve
 - 2s - loss: 0.4076 - acc: 0.8178 - val_loss: 0.6719 - val_acc: 0.7566
Epoch 16/120
Epoch 00016: val_loss did not improve
 - 2s - loss: 0.3448 - acc: 0.8398 - val_loss: 0.3827 - val_acc: 0.8315
Epoch 17/120
Epoch 00017: val_loss did not improve
 - 2s - loss: 0.3441 - acc: 0.8456 - val_loss: 0.3762 - val_acc: 0.8052
Epoch 18/120
Epoch 00018: val_loss did not improve
 - 2s - loss: 0.3272 - acc: 0.8494 - val_loss: 0.4996 - val_acc: 0.7921
Epoch 19/120
Epoch 00019: val_loss did not improve
 - 2s - loss: 0.3392 - acc: 0.8456 - val_loss: 0.3813 - val_acc: 0.8408
Epoch 20/120
Epoch 00020: val_loss did not improve
 - 2s - loss: 0.3093 - acc: 0.8662 - val_loss: 0.4935 - val_acc: 0.7659
Epoch 21/120
Epoch 00021: val_loss did not improve
 - 2s - loss: 0.2820 - acc: 0.8726 - val_loss: 0.3650 - val_acc: 0.8221
Epoch 22/120
Epo

Epoch 80/120
Epoch 00080: val_loss did not improve
 - 2s - loss: 0.1222 - acc: 0.9479 - val_loss: 0.3537 - val_acc: 0.8614
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 2s - loss: 0.1325 - acc: 0.9422 - val_loss: 0.3633 - val_acc: 0.8652
Epoch 82/120
Epoch 00082: val_loss did not improve
 - 2s - loss: 0.1138 - acc: 0.9582 - val_loss: 0.3475 - val_acc: 0.8670
Epoch 83/120
Epoch 00083: val_loss did not improve
 - 2s - loss: 0.1168 - acc: 0.9487 - val_loss: 0.3648 - val_acc: 0.8689
Epoch 84/120
Epoch 00084: val_loss did not improve
 - 2s - loss: 0.1144 - acc: 0.9524 - val_loss: 0.3279 - val_acc: 0.8745
Epoch 85/120
Epoch 00085: val_loss did not improve
 - 2s - loss: 0.1279 - acc: 0.9479 - val_loss: 0.3898 - val_acc: 0.8521
Epoch 86/120
Epoch 00086: val_loss did not improve
 - 2s - loss: 0.1306 - acc: 0.9451 - val_loss: 0.3469 - val_acc: 0.8708
Epoch 87/120
Epoch 00087: val_loss did not improve
 - 2s - loss: 0.1268 - acc: 0.9498 - val_loss: 0.3651 - val_acc: 0.8614
Epoch 88/120
Epo

In [14]:
with open('../features/cnn_4_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))


submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_4_aug_skimage_preprocess.csv', index=False)

0.254597697374
         id  is_iceberg
0  5941774d    0.136666
1  4023181e    0.820309
2  b20200e4    0.349985
3  e7f018bb    0.956698
4  4371c8c3    0.553144


In [15]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3), input_shape=(75, 75, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')


model model


In [16]:
def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=120, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=3)

Epoch 1/120
Epoch 00001: val_loss improved from inf to 0.96397, saving model to best_m.h5
 - 5s - loss: 0.6594 - acc: 0.6315 - val_loss: 0.9640 - val_acc: 0.5140
Epoch 2/120
Epoch 00002: val_loss improved from 0.96397 to 0.88611, saving model to best_m.h5
 - 3s - loss: 0.5970 - acc: 0.6781 - val_loss: 0.8861 - val_acc: 0.5140
Epoch 3/120
Epoch 00003: val_loss did not improve
 - 3s - loss: 0.5747 - acc: 0.7029 - val_loss: 1.3727 - val_acc: 0.5140
Epoch 4/120
Epoch 00004: val_loss did not improve
 - 3s - loss: 0.4929 - acc: 0.7600 - val_loss: 2.3585 - val_acc: 0.5140
Epoch 5/120
Epoch 00005: val_loss did not improve
 - 3s - loss: 0.4852 - acc: 0.7569 - val_loss: 1.8409 - val_acc: 0.5140
Epoch 6/120
Epoch 00006: val_loss did not improve
 - 3s - loss: 0.4847 - acc: 0.7585 - val_loss: 1.4536 - val_acc: 0.5140
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 3s - loss: 0.4526 - acc: 0.7738 - val_loss: 2.9838 - val_acc: 0.5140
Epoch 8/120
Epoch 00008: val_loss did not improve
 - 3s - loss

Epoch 00063: val_loss did not improve
 - 3s - loss: 0.3011 - acc: 0.8627 - val_loss: 1.0451 - val_acc: 0.6841
Epoch 64/120
Epoch 00064: val_loss did not improve
 - 3s - loss: 0.3018 - acc: 0.8684 - val_loss: 0.5397 - val_acc: 0.7234
Epoch 65/120
Epoch 00065: val_loss did not improve
 - 3s - loss: 0.2906 - acc: 0.8665 - val_loss: 0.5395 - val_acc: 0.7925
Epoch 66/120
Epoch 00066: val_loss improved from 0.26842 to 0.26004, saving model to best_m.h5
 - 3s - loss: 0.3150 - acc: 0.8523 - val_loss: 0.2600 - val_acc: 0.8822
Epoch 67/120
Epoch 00067: val_loss did not improve
 - 3s - loss: 0.3228 - acc: 0.8502 - val_loss: 0.3605 - val_acc: 0.8262
Epoch 68/120
Epoch 00068: val_loss did not improve
 - 3s - loss: 0.2817 - acc: 0.8719 - val_loss: 0.2604 - val_acc: 0.8879
Epoch 69/120
Epoch 00069: val_loss did not improve
 - 3s - loss: 0.3161 - acc: 0.8524 - val_loss: 0.3408 - val_acc: 0.8430
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 3s - loss: 0.3043 - acc: 0.8577 - val_loss: 0.2983 - v

Epoch 6/120
Epoch 00006: val_loss did not improve
 - 3s - loss: 0.4791 - acc: 0.7803 - val_loss: 1.6981 - val_acc: 0.5645
Epoch 7/120
Epoch 00007: val_loss did not improve
 - 3s - loss: 0.4860 - acc: 0.7724 - val_loss: 0.6986 - val_acc: 0.6187
Epoch 8/120
Epoch 00008: val_loss improved from 0.69662 to 0.59480, saving model to best_m.h5
 - 3s - loss: 0.4641 - acc: 0.7550 - val_loss: 0.5948 - val_acc: 0.7234
Epoch 9/120
Epoch 00009: val_loss improved from 0.59480 to 0.47690, saving model to best_m.h5
 - 3s - loss: 0.4266 - acc: 0.7901 - val_loss: 0.4769 - val_acc: 0.7421
Epoch 10/120
Epoch 00010: val_loss did not improve
 - 3s - loss: 0.4552 - acc: 0.7654 - val_loss: 0.7083 - val_acc: 0.6467
Epoch 11/120
Epoch 00011: val_loss did not improve
 - 3s - loss: 0.4528 - acc: 0.7681 - val_loss: 1.3945 - val_acc: 0.5626
Epoch 12/120
Epoch 00012: val_loss did not improve
 - 3s - loss: 0.4249 - acc: 0.8085 - val_loss: 1.0756 - val_acc: 0.5664
Epoch 13/120
Epoch 00013: val_loss did not improve
 - 3

Epoch 00069: val_loss did not improve
 - 3s - loss: 0.3232 - acc: 0.8549 - val_loss: 0.2794 - val_acc: 0.8766
Epoch 70/120
Epoch 00070: val_loss did not improve
 - 3s - loss: 0.3118 - acc: 0.8594 - val_loss: 0.5336 - val_acc: 0.7963
Epoch 71/120
Epoch 00071: val_loss did not improve
 - 3s - loss: 0.3222 - acc: 0.8594 - val_loss: 0.2648 - val_acc: 0.8804
Epoch 72/120
Epoch 00072: val_loss did not improve
 - 3s - loss: 0.2892 - acc: 0.8776 - val_loss: 0.2610 - val_acc: 0.8748
Epoch 73/120
Epoch 00073: val_loss improved from 0.26009 to 0.24592, saving model to best_m.h5
 - 3s - loss: 0.2769 - acc: 0.8691 - val_loss: 0.2459 - val_acc: 0.8935
Epoch 74/120
Epoch 00074: val_loss improved from 0.24592 to 0.23614, saving model to best_m.h5
 - 3s - loss: 0.2976 - acc: 0.8615 - val_loss: 0.2361 - val_acc: 0.8841
Epoch 75/120
Epoch 00075: val_loss did not improve
 - 3s - loss: 0.2876 - acc: 0.8658 - val_loss: 0.2499 - val_acc: 0.8804
Epoch 76/120
Epoch 00076: val_loss did not improve
 - 3s - loss:

Epoch 12/120
Epoch 00012: val_loss improved from 0.57015 to 0.55189, saving model to best_m.h5
 - 3s - loss: 0.4159 - acc: 0.8006 - val_loss: 0.5519 - val_acc: 0.7528
Epoch 13/120
Epoch 00013: val_loss did not improve
 - 3s - loss: 0.4180 - acc: 0.8057 - val_loss: 1.2765 - val_acc: 0.5337
Epoch 14/120
Epoch 00014: val_loss improved from 0.55189 to 0.37239, saving model to best_m.h5
 - 3s - loss: 0.3790 - acc: 0.8186 - val_loss: 0.3724 - val_acc: 0.8109
Epoch 15/120
Epoch 00015: val_loss did not improve
 - 3s - loss: 0.4201 - acc: 0.8028 - val_loss: 1.2901 - val_acc: 0.5749
Epoch 16/120
Epoch 00016: val_loss did not improve
 - 3s - loss: 0.4018 - acc: 0.8075 - val_loss: 0.3973 - val_acc: 0.8240
Epoch 17/120
Epoch 00017: val_loss did not improve
 - 3s - loss: 0.3805 - acc: 0.8201 - val_loss: 0.6454 - val_acc: 0.7022
Epoch 18/120
Epoch 00018: val_loss improved from 0.37239 to 0.36126, saving model to best_m.h5
 - 3s - loss: 0.3825 - acc: 0.8171 - val_loss: 0.3613 - val_acc: 0.8240
Epoch 1

Epoch 76/120
Epoch 00076: val_loss did not improve
 - 3s - loss: 0.2496 - acc: 0.8768 - val_loss: 0.3186 - val_acc: 0.8464
Epoch 77/120
Epoch 00077: val_loss did not improve
 - 3s - loss: 0.2294 - acc: 0.8901 - val_loss: 0.3162 - val_acc: 0.8539
Epoch 78/120
Epoch 00078: val_loss did not improve
 - 3s - loss: 0.2427 - acc: 0.8973 - val_loss: 0.3262 - val_acc: 0.8408
Epoch 79/120
Epoch 00079: val_loss did not improve
 - 3s - loss: 0.2458 - acc: 0.8843 - val_loss: 0.3613 - val_acc: 0.8483
Epoch 80/120
Epoch 00080: val_loss did not improve
 - 3s - loss: 0.2186 - acc: 0.9006 - val_loss: 0.3126 - val_acc: 0.8521
Epoch 81/120
Epoch 00081: val_loss did not improve
 - 3s - loss: 0.2419 - acc: 0.8991 - val_loss: 0.3119 - val_acc: 0.8502
Epoch 82/120
Epoch 00082: val_loss did not improve
 - 3s - loss: 0.2351 - acc: 0.8935 - val_loss: 0.3360 - val_acc: 0.8502
Epoch 83/120
Epoch 00083: val_loss did not improve
 - 3s - loss: 0.2227 - acc: 0.9004 - val_loss: 0.3462 - val_acc: 0.8614
Epoch 84/120
Epo

In [17]:
with open('../features/cnn_5_aug_skimage_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))
# 2364

submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/cnn_5_aug_skimage_preprocess.csv', index=False)

0.239071600235
         id  is_iceberg
0  5941774d    0.121252
1  4023181e    0.536492
2  b20200e4    0.561281
3  e7f018bb    0.999569
4  4371c8c3    0.774747
