In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.utils.np_utils import to_categorical
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Flatten, Lambda, Activation
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint,LearningRateScheduler,EarlyStopping

train_df = pd.read_json('../input/train.json')
test_df = pd.read_json('../input/test.json')

Using TensorFlow backend.


In [2]:
def get_image(df):
    '''Create 3-channel 'images'. Return rescale-normalised images.'''
    images = []
    for i, row in df.iterrows():
        # Formulate the bands as 75x75 arrays
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 + band_2)/2

        # Rescale
        r = (band_1 - band_1.min()) / (band_1.max() - band_1.min())
        g = (band_2 - band_2.min()) / (band_2.max() - band_2.min())
        b = (band_3 - band_3.min()) / (band_3.max() - band_3.min())

        rgb = np.dstack((r, g, b))
        images.append(rgb)
    return np.array(images)


train_x = get_image(train_df)
test_x = get_image(test_df)

print(train_x.shape,test_x.shape)

(1604, 75, 75, 3) (8424, 75, 75, 3)


In [3]:
y = train_df.is_iceberg.values
print(y[:5])

[0 0 1 0 0]


In [4]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(16, (3, 3), padding='same',input_shape=(75,75,3)))
    model.add(Activation('relu'))
    model.add(Conv2D(16, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    return model
print('model model')


model model


In [10]:
from sklearn.model_selection import KFold
from keras.preprocessing.image import ImageDataGenerator

def lr_f(epoch):
    if epoch<10:
        return 0.001
    elif epoch<30:
        return 0.0005
    elif epoch<50:
        return 0.0001
    else:
        return 0.00005

def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=100, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=5)

import pickle
with open('../features/cnn_1_aug_rescale_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
from sklearn.metrics import log_loss
print(log_loss(y,train_pred))

# pre 15161
# retrain fold3 1, 15269
# 2, 15223
# 3, 15187
# fold2 1, 15192
# fold4 1, 15348
# fold5 1, 15167

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.68048, saving model to best_m.h5
 - 3s - loss: 0.7012 - acc: 0.5370 - val_loss: 0.6805 - val_acc: 0.6511
Epoch 2/100
Epoch 00002: val_loss did not improve
 - 1s - loss: 0.6925 - acc: 0.5169 - val_loss: 0.6880 - val_acc: 0.4984
Epoch 3/100
Epoch 00003: val_loss improved from 0.68048 to 0.67036, saving model to best_m.h5
 - 2s - loss: 0.6899 - acc: 0.5370 - val_loss: 0.6704 - val_acc: 0.6636
Epoch 4/100
Epoch 00004: val_loss improved from 0.67036 to 0.62504, saving model to best_m.h5
 - 2s - loss: 0.6504 - acc: 0.6122 - val_loss: 0.6250 - val_acc: 0.6542
Epoch 5/100
Epoch 00005: val_loss improved from 0.62504 to 0.58690, saving model to best_m.h5
 - 2s - loss: 0.6055 - acc: 0.6591 - val_loss: 0.5869 - val_acc: 0.6885
Epoch 6/100
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.5737 - acc: 0.6718 - val_loss: 0.5986 - val_acc: 0.6667
Epoch 7/100
Epoch 00007: val_loss improved from 0.58690 to 0.56860, saving model to best_m.h5
 -

Epoch 61/100
Epoch 00061: val_loss did not improve
 - 2s - loss: 0.2997 - acc: 0.8656 - val_loss: 0.2960 - val_acc: 0.8598
Epoch 62/100
Epoch 00062: val_loss did not improve
 - 1s - loss: 0.3097 - acc: 0.8586 - val_loss: 0.3155 - val_acc: 0.8536
Epoch 63/100
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.3165 - acc: 0.8461 - val_loss: 0.3096 - val_acc: 0.8505
Epoch 64/100
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.2932 - acc: 0.8523 - val_loss: 0.3217 - val_acc: 0.8474
Epoch 65/100
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3090 - acc: 0.8539 - val_loss: 0.3009 - val_acc: 0.8505
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.3130 - acc: 0.8492 - val_loss: 0.3095 - val_acc: 0.8474
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.3061 - acc: 0.8427 - val_loss: 0.3096 - val_acc: 0.8536
Epoch 68/100
Epoch 00068: val_loss did not improve
 - 1s - loss: 0.3006 - acc: 0.8605 - val_loss: 0.3005 - val_acc: 0.8536
Epoch 69/100
Epo

Epoch 22/100
Epoch 00022: val_loss did not improve
 - 1s - loss: 0.4609 - acc: 0.7568 - val_loss: 0.4111 - val_acc: 0.8162
Epoch 23/100
Epoch 00023: val_loss improved from 0.40274 to 0.39536, saving model to best_m.h5
 - 2s - loss: 0.4523 - acc: 0.7816 - val_loss: 0.3954 - val_acc: 0.8069
Epoch 24/100
Epoch 00024: val_loss did not improve
 - 1s - loss: 0.4330 - acc: 0.7828 - val_loss: 0.3974 - val_acc: 0.8193
Epoch 25/100
Epoch 00025: val_loss did not improve
 - 1s - loss: 0.4124 - acc: 0.8031 - val_loss: 0.4617 - val_acc: 0.7882
Epoch 26/100
Epoch 00026: val_loss did not improve
 - 1s - loss: 0.4161 - acc: 0.7923 - val_loss: 0.4113 - val_acc: 0.7757
Epoch 27/100
Epoch 00027: val_loss improved from 0.39536 to 0.34378, saving model to best_m.h5
 - 2s - loss: 0.4214 - acc: 0.7937 - val_loss: 0.3438 - val_acc: 0.8411
Epoch 28/100
Epoch 00028: val_loss did not improve
 - 1s - loss: 0.4187 - acc: 0.7906 - val_loss: 0.3462 - val_acc: 0.8505
Epoch 29/100
Epoch 00029: val_loss did not improve


Epoch 84/100
Epoch 00084: val_loss did not improve
 - 2s - loss: 0.3477 - acc: 0.8373 - val_loss: 0.3120 - val_acc: 0.8411
Epoch 85/100
Epoch 00085: val_loss did not improve
 - 1s - loss: 0.3333 - acc: 0.8326 - val_loss: 0.3141 - val_acc: 0.8411
Epoch 86/100
Epoch 00086: val_loss did not improve
 - 1s - loss: 0.3369 - acc: 0.8396 - val_loss: 0.3185 - val_acc: 0.8442
Epoch 87/100
Epoch 00087: val_loss did not improve
 - 2s - loss: 0.3395 - acc: 0.8437 - val_loss: 0.3147 - val_acc: 0.8411
Epoch 88/100
Epoch 00088: val_loss did not improve
 - 1s - loss: 0.3490 - acc: 0.8302 - val_loss: 0.3175 - val_acc: 0.8318
Epoch 89/100
Epoch 00089: val_loss did not improve
 - 1s - loss: 0.3258 - acc: 0.8402 - val_loss: 0.3100 - val_acc: 0.8505
Epoch 90/100
Epoch 00090: val_loss improved from 0.30909 to 0.30809, saving model to best_m.h5
 - 2s - loss: 0.3436 - acc: 0.8406 - val_loss: 0.3081 - val_acc: 0.8411
Epoch 91/100
Epoch 00091: val_loss did not improve
 - 1s - loss: 0.3232 - acc: 0.8531 - val_los

Epoch 43/100
Epoch 00043: val_loss did not improve
 - 1s - loss: 0.4034 - acc: 0.7949 - val_loss: 0.3724 - val_acc: 0.8349
Epoch 44/100
Epoch 00044: val_loss did not improve
 - 1s - loss: 0.3955 - acc: 0.8076 - val_loss: 0.3737 - val_acc: 0.8442
Epoch 45/100
Epoch 00045: val_loss improved from 0.36924 to 0.36248, saving model to best_m.h5
 - 2s - loss: 0.4086 - acc: 0.8005 - val_loss: 0.3625 - val_acc: 0.8536
Epoch 46/100
Epoch 00046: val_loss did not improve
 - 1s - loss: 0.3868 - acc: 0.8250 - val_loss: 0.3721 - val_acc: 0.8380
Epoch 47/100
Epoch 00047: val_loss improved from 0.36248 to 0.35115, saving model to best_m.h5
 - 2s - loss: 0.3953 - acc: 0.8063 - val_loss: 0.3512 - val_acc: 0.8474
Epoch 48/100
Epoch 00048: val_loss did not improve
 - 1s - loss: 0.4007 - acc: 0.8062 - val_loss: 0.3604 - val_acc: 0.8442
Epoch 49/100
Epoch 00049: val_loss did not improve
 - 1s - loss: 0.3707 - acc: 0.8257 - val_loss: 0.3548 - val_acc: 0.8567
Epoch 50/100
Epoch 00050: val_loss did not improve


Epoch 3/100
Epoch 00003: val_loss improved from 0.69044 to 0.68997, saving model to best_m.h5
 - 1s - loss: 0.6920 - acc: 0.5271 - val_loss: 0.6900 - val_acc: 0.5421
Epoch 4/100
Epoch 00004: val_loss improved from 0.68997 to 0.68996, saving model to best_m.h5
 - 1s - loss: 0.6914 - acc: 0.5302 - val_loss: 0.6900 - val_acc: 0.5421
Epoch 5/100
Epoch 00005: val_loss improved from 0.68996 to 0.68618, saving model to best_m.h5
 - 2s - loss: 0.6912 - acc: 0.5368 - val_loss: 0.6862 - val_acc: 0.6168
Epoch 6/100
Epoch 00006: val_loss improved from 0.68618 to 0.65033, saving model to best_m.h5
 - 2s - loss: 0.6674 - acc: 0.6183 - val_loss: 0.6503 - val_acc: 0.6293
Epoch 7/100
Epoch 00007: val_loss improved from 0.65033 to 0.59802, saving model to best_m.h5
 - 2s - loss: 0.6350 - acc: 0.6669 - val_loss: 0.5980 - val_acc: 0.6417
Epoch 8/100
Epoch 00008: val_loss did not improve
 - 1s - loss: 0.6037 - acc: 0.6904 - val_loss: 0.6163 - val_acc: 0.6480
Epoch 9/100
Epoch 00009: val_loss improved from 

Epoch 64/100
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.3735 - acc: 0.8242 - val_loss: 0.3769 - val_acc: 0.7788
Epoch 65/100
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.4006 - acc: 0.8148 - val_loss: 0.3788 - val_acc: 0.7882
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.3677 - acc: 0.8286 - val_loss: 0.3663 - val_acc: 0.7975
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.3851 - acc: 0.8193 - val_loss: 0.3626 - val_acc: 0.8069
Epoch 68/100
Epoch 00068: val_loss improved from 0.36148 to 0.35650, saving model to best_m.h5
 - 2s - loss: 0.3931 - acc: 0.8146 - val_loss: 0.3565 - val_acc: 0.8069
Epoch 69/100
Epoch 00069: val_loss did not improve
 - 1s - loss: 0.3889 - acc: 0.8109 - val_loss: 0.3574 - val_acc: 0.8100
Epoch 70/100
Epoch 00070: val_loss did not improve
 - 1s - loss: 0.3751 - acc: 0.8162 - val_loss: 0.3667 - val_acc: 0.8006
Epoch 71/100
Epoch 00071: val_loss did not improve
 - 1s - loss: 0.3840 - acc: 0.8226 - val_los

Epoch 24/100
Epoch 00024: val_loss did not improve
 - 1s - loss: 0.4346 - acc: 0.7945 - val_loss: 0.3998 - val_acc: 0.7906
Epoch 25/100
Epoch 00025: val_loss did not improve
 - 2s - loss: 0.4014 - acc: 0.8031 - val_loss: 0.4014 - val_acc: 0.7969
Epoch 26/100
Epoch 00026: val_loss improved from 0.39864 to 0.39269, saving model to best_m.h5
 - 2s - loss: 0.3928 - acc: 0.8266 - val_loss: 0.3927 - val_acc: 0.7875
Epoch 27/100
Epoch 00027: val_loss improved from 0.39269 to 0.36530, saving model to best_m.h5
 - 2s - loss: 0.3737 - acc: 0.8344 - val_loss: 0.3653 - val_acc: 0.8219
Epoch 28/100
Epoch 00028: val_loss improved from 0.36530 to 0.35987, saving model to best_m.h5
 - 2s - loss: 0.4006 - acc: 0.7985 - val_loss: 0.3599 - val_acc: 0.8250
Epoch 29/100
Epoch 00029: val_loss did not improve
 - 2s - loss: 0.3755 - acc: 0.8297 - val_loss: 0.3978 - val_acc: 0.8063
Epoch 30/100
Epoch 00030: val_loss did not improve
 - 1s - loss: 0.4084 - acc: 0.7945 - val_loss: 0.3732 - val_acc: 0.8187
Epoch 3

Epoch 00087: val_loss did not improve
 - 1s - loss: 0.2945 - acc: 0.8726 - val_loss: 0.3423 - val_acc: 0.8375
Epoch 88/100
Epoch 00088: val_loss did not improve
 - 2s - loss: 0.2922 - acc: 0.8625 - val_loss: 0.3363 - val_acc: 0.8313
Epoch 89/100
Epoch 00089: val_loss improved from 0.33352 to 0.33271, saving model to best_m.h5
 - 2s - loss: 0.3166 - acc: 0.8461 - val_loss: 0.3327 - val_acc: 0.8406
Epoch 90/100
Epoch 00090: val_loss did not improve
 - 1s - loss: 0.2960 - acc: 0.8625 - val_loss: 0.3424 - val_acc: 0.8281
Epoch 91/100
Epoch 00091: val_loss did not improve
 - 2s - loss: 0.2954 - acc: 0.8633 - val_loss: 0.3443 - val_acc: 0.8375
Epoch 92/100
Epoch 00092: val_loss did not improve
 - 1s - loss: 0.2971 - acc: 0.8703 - val_loss: 0.3508 - val_acc: 0.8344
Epoch 93/100
Epoch 00093: val_loss did not improve
 - 1s - loss: 0.2896 - acc: 0.8672 - val_loss: 0.3487 - val_acc: 0.8406
Epoch 94/100
Epoch 00094: val_loss did not improve
 - 1s - loss: 0.3065 - acc: 0.8602 - val_loss: 0.3566 - v

In [12]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(32, (3, 3),input_shape=(75, 75, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1, activation='sigmoid'))
    return model
print('model model')

def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=100, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=5)


with open('../features/cnn_2_aug_rescale_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# retrain fold5, 1, 15080
# 2, 15274


model model
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.69396, saving model to best_m.h5
 - 4s - loss: 0.6918 - acc: 0.5419 - val_loss: 0.6940 - val_acc: 0.4984
Epoch 2/100
Epoch 00002: val_loss improved from 0.69396 to 0.66085, saving model to best_m.h5
 - 2s - loss: 0.6904 - acc: 0.5435 - val_loss: 0.6608 - val_acc: 0.4984
Epoch 3/100
Epoch 00003: val_loss improved from 0.66085 to 0.58015, saving model to best_m.h5
 - 2s - loss: 0.6497 - acc: 0.5831 - val_loss: 0.5801 - val_acc: 0.7165
Epoch 4/100
Epoch 00004: val_loss improved from 0.58015 to 0.52270, saving model to best_m.h5
 - 2s - loss: 0.5577 - acc: 0.6966 - val_loss: 0.5227 - val_acc: 0.7383
Epoch 5/100
Epoch 00005: val_loss did not improve
 - 1s - loss: 0.5651 - acc: 0.7109 - val_loss: 0.5372 - val_acc: 0.7508
Epoch 6/100
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.5427 - acc: 0.7229 - val_loss: 0.5766 - val_acc: 0.7009
Epoch 7/100
Epoch 00007: val_loss improved from 0.52270 to 0.46577, saving model to 

Epoch 00060: val_loss did not improve
 - 1s - loss: 0.2832 - acc: 0.8625 - val_loss: 0.2718 - val_acc: 0.8816
Epoch 61/100
Epoch 00061: val_loss did not improve
 - 1s - loss: 0.2921 - acc: 0.8711 - val_loss: 0.2694 - val_acc: 0.8847
Epoch 62/100
Epoch 00062: val_loss improved from 0.26196 to 0.26187, saving model to best_m.h5
 - 1s - loss: 0.2896 - acc: 0.8636 - val_loss: 0.2619 - val_acc: 0.8847
Epoch 63/100
Epoch 00063: val_loss improved from 0.26187 to 0.25556, saving model to best_m.h5
 - 1s - loss: 0.2943 - acc: 0.8584 - val_loss: 0.2556 - val_acc: 0.8879
Epoch 64/100
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.2957 - acc: 0.8630 - val_loss: 0.2621 - val_acc: 0.8847
Epoch 65/100
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.2859 - acc: 0.8630 - val_loss: 0.2625 - val_acc: 0.8879
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.2989 - acc: 0.8625 - val_loss: 0.2607 - val_acc: 0.8847
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 1s - loss:

Epoch 22/100
Epoch 00022: val_loss improved from 0.37239 to 0.35509, saving model to best_m.h5
 - 1s - loss: 0.3568 - acc: 0.8445 - val_loss: 0.3551 - val_acc: 0.8411
Epoch 23/100
Epoch 00023: val_loss did not improve
 - 1s - loss: 0.3664 - acc: 0.8359 - val_loss: 0.3831 - val_acc: 0.8287
Epoch 24/100
Epoch 00024: val_loss did not improve
 - 1s - loss: 0.3726 - acc: 0.8345 - val_loss: 0.3611 - val_acc: 0.8411
Epoch 25/100
Epoch 00025: val_loss did not improve
 - 1s - loss: 0.3845 - acc: 0.8242 - val_loss: 0.3876 - val_acc: 0.8131
Epoch 26/100
Epoch 00026: val_loss did not improve
 - 1s - loss: 0.3792 - acc: 0.8282 - val_loss: 0.3625 - val_acc: 0.8411
Epoch 27/100
Epoch 00027: val_loss did not improve
 - 1s - loss: 0.3530 - acc: 0.8255 - val_loss: 0.4042 - val_acc: 0.8349
Epoch 28/100
Epoch 00028: val_loss did not improve
 - 1s - loss: 0.3629 - acc: 0.8396 - val_loss: 0.3642 - val_acc: 0.8349
Epoch 29/100
Epoch 00029: val_loss did not improve
 - 1s - loss: 0.3320 - acc: 0.8523 - val_los

Epoch 84/100
Epoch 00084: val_loss did not improve
 - 1s - loss: 0.2693 - acc: 0.8797 - val_loss: 0.2971 - val_acc: 0.8816
Epoch 85/100
Epoch 00085: val_loss did not improve
 - 1s - loss: 0.2842 - acc: 0.8687 - val_loss: 0.2982 - val_acc: 0.8723
Epoch 86/100
Epoch 00086: val_loss did not improve
 - 1s - loss: 0.2877 - acc: 0.8693 - val_loss: 0.2998 - val_acc: 0.8692
Epoch 87/100
Epoch 00087: val_loss did not improve
 - 1s - loss: 0.2793 - acc: 0.8680 - val_loss: 0.3046 - val_acc: 0.8505
Epoch 88/100
Epoch 00088: val_loss did not improve
 - 1s - loss: 0.2855 - acc: 0.8716 - val_loss: 0.3045 - val_acc: 0.8598
Epoch 89/100
Epoch 00089: val_loss improved from 0.29463 to 0.29319, saving model to best_m.h5
 - 1s - loss: 0.2702 - acc: 0.8795 - val_loss: 0.2932 - val_acc: 0.8785
Epoch 90/100
Epoch 00090: val_loss improved from 0.29319 to 0.29225, saving model to best_m.h5
 - 1s - loss: 0.2996 - acc: 0.8609 - val_loss: 0.2922 - val_acc: 0.8816
Epoch 91/100
Epoch 00091: val_loss did not improve


Epoch 42/100
Epoch 00042: val_loss did not improve
 - 1s - loss: 0.3624 - acc: 0.8292 - val_loss: 0.3142 - val_acc: 0.8474
Epoch 43/100
Epoch 00043: val_loss did not improve
 - 1s - loss: 0.3931 - acc: 0.8144 - val_loss: 0.3097 - val_acc: 0.8536
Epoch 44/100
Epoch 00044: val_loss did not improve
 - 1s - loss: 0.3355 - acc: 0.8500 - val_loss: 0.3222 - val_acc: 0.8380
Epoch 45/100
Epoch 00045: val_loss did not improve
 - 1s - loss: 0.3423 - acc: 0.8406 - val_loss: 0.3067 - val_acc: 0.8442
Epoch 46/100
Epoch 00046: val_loss did not improve
 - 1s - loss: 0.3404 - acc: 0.8344 - val_loss: 0.3055 - val_acc: 0.8442
Epoch 47/100
Epoch 00047: val_loss did not improve
 - 1s - loss: 0.3573 - acc: 0.8201 - val_loss: 0.3091 - val_acc: 0.8411
Epoch 48/100
Epoch 00048: val_loss did not improve
 - 1s - loss: 0.3340 - acc: 0.8336 - val_loss: 0.3066 - val_acc: 0.8380
Epoch 49/100
Epoch 00049: val_loss improved from 0.30266 to 0.29768, saving model to best_m.h5
 - 1s - loss: 0.3467 - acc: 0.8445 - val_los

Epoch 4/100
Epoch 00004: val_loss improved from 0.59510 to 0.58071, saving model to best_m.h5
 - 1s - loss: 0.5959 - acc: 0.6646 - val_loss: 0.5807 - val_acc: 0.6760
Epoch 5/100
Epoch 00005: val_loss improved from 0.58071 to 0.57516, saving model to best_m.h5
 - 1s - loss: 0.5724 - acc: 0.6882 - val_loss: 0.5752 - val_acc: 0.6729
Epoch 6/100
Epoch 00006: val_loss did not improve
 - 1s - loss: 0.5825 - acc: 0.6771 - val_loss: 0.5763 - val_acc: 0.6822
Epoch 7/100
Epoch 00007: val_loss did not improve
 - 1s - loss: 0.5580 - acc: 0.6982 - val_loss: 0.5841 - val_acc: 0.6511
Epoch 8/100
Epoch 00008: val_loss improved from 0.57516 to 0.57083, saving model to best_m.h5
 - 1s - loss: 0.5677 - acc: 0.6857 - val_loss: 0.5708 - val_acc: 0.6791
Epoch 9/100
Epoch 00009: val_loss improved from 0.57083 to 0.55839, saving model to best_m.h5
 - 1s - loss: 0.5410 - acc: 0.7141 - val_loss: 0.5584 - val_acc: 0.6885
Epoch 10/100
Epoch 00010: val_loss improved from 0.55839 to 0.54967, saving model to best_m.

Epoch 63/100
Epoch 00063: val_loss did not improve
 - 1s - loss: 0.2717 - acc: 0.8812 - val_loss: 0.2740 - val_acc: 0.8692
Epoch 64/100
Epoch 00064: val_loss did not improve
 - 1s - loss: 0.2909 - acc: 0.8680 - val_loss: 0.2782 - val_acc: 0.8692
Epoch 65/100
Epoch 00065: val_loss did not improve
 - 1s - loss: 0.3097 - acc: 0.8566 - val_loss: 0.2792 - val_acc: 0.8536
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 1s - loss: 0.2873 - acc: 0.8609 - val_loss: 0.2716 - val_acc: 0.8629
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 1s - loss: 0.2815 - acc: 0.8808 - val_loss: 0.2861 - val_acc: 0.8629
Epoch 68/100
Epoch 00068: val_loss did not improve
 - 1s - loss: 0.2949 - acc: 0.8555 - val_loss: 0.2748 - val_acc: 0.8598
Epoch 69/100
Epoch 00069: val_loss did not improve
 - 1s - loss: 0.2849 - acc: 0.8688 - val_loss: 0.2794 - val_acc: 0.8629
Epoch 70/100
Epoch 00070: val_loss did not improve
 - 1s - loss: 0.2938 - acc: 0.8617 - val_loss: 0.2725 - val_acc: 0.8660
Epoch 71/100
Epo

Epoch 00024: val_loss did not improve
 - 1s - loss: 0.3364 - acc: 0.8454 - val_loss: 0.3560 - val_acc: 0.8344
Epoch 25/100
Epoch 00025: val_loss improved from 0.35503 to 0.33935, saving model to best_m.h5
 - 1s - loss: 0.3470 - acc: 0.8344 - val_loss: 0.3393 - val_acc: 0.8406
Epoch 26/100
Epoch 00026: val_loss did not improve
 - 1s - loss: 0.3203 - acc: 0.8437 - val_loss: 0.3712 - val_acc: 0.8313
Epoch 27/100
Epoch 00027: val_loss did not improve
 - 1s - loss: 0.3515 - acc: 0.8274 - val_loss: 0.3583 - val_acc: 0.8469
Epoch 28/100
Epoch 00028: val_loss did not improve
 - 1s - loss: 0.3376 - acc: 0.8445 - val_loss: 0.3495 - val_acc: 0.8375
Epoch 29/100
Epoch 00029: val_loss did not improve
 - 1s - loss: 0.3181 - acc: 0.8507 - val_loss: 0.3625 - val_acc: 0.8219
Epoch 30/100
Epoch 00030: val_loss improved from 0.33935 to 0.33368, saving model to best_m.h5
 - 1s - loss: 0.3379 - acc: 0.8367 - val_loss: 0.3337 - val_acc: 0.8375
Epoch 31/100
Epoch 00031: val_loss did not improve
 - 1s - loss:

Epoch 90/100
Epoch 00090: val_loss did not improve
 - 1s - loss: 0.2631 - acc: 0.8765 - val_loss: 0.3438 - val_acc: 0.8313
Epoch 91/100
Epoch 00091: val_loss did not improve
 - 1s - loss: 0.2312 - acc: 0.8922 - val_loss: 0.3359 - val_acc: 0.8406
Epoch 92/100
Epoch 00092: val_loss did not improve
 - 1s - loss: 0.2511 - acc: 0.8867 - val_loss: 0.3426 - val_acc: 0.8406
Epoch 93/100
Epoch 00093: val_loss did not improve
 - 1s - loss: 0.2398 - acc: 0.8898 - val_loss: 0.3500 - val_acc: 0.8406
Epoch 94/100
Epoch 00094: val_loss did not improve
 - 1s - loss: 0.2570 - acc: 0.8781 - val_loss: 0.3461 - val_acc: 0.8438
Epoch 95/100
Epoch 00095: val_loss did not improve
 - 1s - loss: 0.2588 - acc: 0.8844 - val_loss: 0.3377 - val_acc: 0.8375
Epoch 96/100
Epoch 00096: val_loss did not improve
 - 1s - loss: 0.2500 - acc: 0.8867 - val_loss: 0.3450 - val_acc: 0.8406
Epoch 97/100
Epoch 00097: val_loss did not improve
 - 1s - loss: 0.2534 - acc: 0.8937 - val_loss: 0.3560 - val_acc: 0.8344
Epoch 98/100
Epo

In [16]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu' ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')

def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=100, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)


with open('../features/cnn_3_aug_rescale_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# this 2737
# pre 15080
# fold3 ,1 ,15297
# fold4 ,1 , 15258
# fold4 ,2 , 15137
# fold5 ,1 , 15302

model model
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.69512, saving model to best_m.h5
 - 6s - loss: 0.6953 - acc: 0.5389 - val_loss: 0.6951 - val_acc: 0.5087
Epoch 2/100
Epoch 00002: val_loss improved from 0.69512 to 0.69337, saving model to best_m.h5
 - 2s - loss: 0.6919 - acc: 0.5380 - val_loss: 0.6934 - val_acc: 0.5087
Epoch 3/100
Epoch 00003: val_loss did not improve
 - 2s - loss: 0.6914 - acc: 0.5364 - val_loss: 0.6940 - val_acc: 0.5087
Epoch 4/100
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.6915 - acc: 0.5336 - val_loss: 0.6941 - val_acc: 0.5087
Epoch 5/100
Epoch 00005: val_loss did not improve
 - 2s - loss: 0.6896 - acc: 0.5414 - val_loss: 0.6936 - val_acc: 0.5087
Epoch 6/100
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.6889 - acc: 0.5261 - val_loss: 0.7010 - val_acc: 0.5087
Epoch 7/100
Epoch 00007: val_loss improved from 0.69337 to 0.67191, saving model to best_m.h5
 - 2s - loss: 0.6709 - acc: 0.5897 - val_loss: 0.6719 - val_acc: 0.5037
Epoch 

Epoch 57/100
Epoch 00057: val_loss did not improve
 - 2s - loss: 0.3755 - acc: 0.8075 - val_loss: 0.3037 - val_acc: 0.8379
Epoch 58/100
Epoch 00058: val_loss did not improve
 - 2s - loss: 0.3763 - acc: 0.8164 - val_loss: 0.3030 - val_acc: 0.8454
Epoch 59/100
Epoch 00059: val_loss did not improve
 - 2s - loss: 0.3918 - acc: 0.8025 - val_loss: 0.3208 - val_acc: 0.8354
Epoch 60/100
Epoch 00060: val_loss improved from 0.30284 to 0.30182, saving model to best_m.h5
 - 2s - loss: 0.4013 - acc: 0.8072 - val_loss: 0.3018 - val_acc: 0.8554
Epoch 61/100
Epoch 00061: val_loss did not improve
 - 2s - loss: 0.3898 - acc: 0.8170 - val_loss: 0.3104 - val_acc: 0.8404
Epoch 62/100
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.3762 - acc: 0.8256 - val_loss: 0.3166 - val_acc: 0.8354
Epoch 63/100
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.3998 - acc: 0.7995 - val_loss: 0.3037 - val_acc: 0.8404
Epoch 64/100
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.3573 - acc: 0.8333 - val_los

Epoch 18/100
Epoch 00018: val_loss improved from 0.42812 to 0.42375, saving model to best_m.h5
 - 2s - loss: 0.4341 - acc: 0.7993 - val_loss: 0.4238 - val_acc: 0.8080
Epoch 19/100
Epoch 00019: val_loss did not improve
 - 2s - loss: 0.4193 - acc: 0.8172 - val_loss: 0.4292 - val_acc: 0.8180
Epoch 20/100
Epoch 00020: val_loss did not improve
 - 2s - loss: 0.4113 - acc: 0.8195 - val_loss: 0.4355 - val_acc: 0.8030
Epoch 21/100
Epoch 00021: val_loss improved from 0.42375 to 0.39379, saving model to best_m.h5
 - 2s - loss: 0.4187 - acc: 0.8141 - val_loss: 0.3938 - val_acc: 0.8279
Epoch 22/100
Epoch 00022: val_loss improved from 0.39379 to 0.39086, saving model to best_m.h5
 - 2s - loss: 0.3767 - acc: 0.8375 - val_loss: 0.3909 - val_acc: 0.8329
Epoch 23/100
Epoch 00023: val_loss did not improve
 - 2s - loss: 0.3742 - acc: 0.8466 - val_loss: 0.4334 - val_acc: 0.8254
Epoch 24/100
Epoch 00024: val_loss did not improve
 - 2s - loss: 0.4215 - acc: 0.7925 - val_loss: 0.4096 - val_acc: 0.8229
Epoch 2

Epoch 80/100
Epoch 00080: val_loss did not improve
 - 2s - loss: 0.3275 - acc: 0.8441 - val_loss: 0.3482 - val_acc: 0.8529
Epoch 81/100
Epoch 00081: val_loss did not improve
 - 2s - loss: 0.3203 - acc: 0.8600 - val_loss: 0.3390 - val_acc: 0.8579
Epoch 82/100
Epoch 00082: val_loss did not improve
 - 2s - loss: 0.3148 - acc: 0.8581 - val_loss: 0.3320 - val_acc: 0.8529
Epoch 83/100
Epoch 00083: val_loss did not improve
 - 2s - loss: 0.3363 - acc: 0.8422 - val_loss: 0.3369 - val_acc: 0.8504
Epoch 84/100
Epoch 00084: val_loss did not improve
 - 2s - loss: 0.3044 - acc: 0.8558 - val_loss: 0.3357 - val_acc: 0.8603
Epoch 85/100
Epoch 00085: val_loss did not improve
 - 2s - loss: 0.3195 - acc: 0.8641 - val_loss: 0.3326 - val_acc: 0.8504
Epoch 86/100
Epoch 00086: val_loss did not improve
 - 2s - loss: 0.3331 - acc: 0.8350 - val_loss: 0.3338 - val_acc: 0.8504
Epoch 87/100
Epoch 00087: val_loss did not improve
 - 2s - loss: 0.3422 - acc: 0.8447 - val_loss: 0.3278 - val_acc: 0.8603
Epoch 88/100
Epo

Epoch 39/100
Epoch 00039: val_loss improved from 0.28804 to 0.28446, saving model to best_m.h5
 - 2s - loss: 0.3377 - acc: 0.8295 - val_loss: 0.2845 - val_acc: 0.8678
Epoch 40/100
Epoch 00040: val_loss did not improve
 - 2s - loss: 0.3174 - acc: 0.8650 - val_loss: 0.2982 - val_acc: 0.8529
Epoch 41/100
Epoch 00041: val_loss improved from 0.28446 to 0.28213, saving model to best_m.h5
 - 2s - loss: 0.3215 - acc: 0.8422 - val_loss: 0.2821 - val_acc: 0.8653
Epoch 42/100
Epoch 00042: val_loss did not improve
 - 2s - loss: 0.3212 - acc: 0.8517 - val_loss: 0.3018 - val_acc: 0.8529
Epoch 43/100
Epoch 00043: val_loss did not improve
 - 2s - loss: 0.3546 - acc: 0.8414 - val_loss: 0.2838 - val_acc: 0.8678
Epoch 44/100
Epoch 00044: val_loss did not improve
 - 2s - loss: 0.3167 - acc: 0.8483 - val_loss: 0.2882 - val_acc: 0.8603
Epoch 45/100
Epoch 00045: val_loss did not improve
 - 2s - loss: 0.3302 - acc: 0.8406 - val_loss: 0.2863 - val_acc: 0.8653
Epoch 46/100
Epoch 00046: val_loss did not improve


 - 6s - loss: 0.6945 - acc: 0.5305 - val_loss: 0.6910 - val_acc: 0.5062
Epoch 2/100
Epoch 00002: val_loss did not improve
 - 2s - loss: 0.6921 - acc: 0.5334 - val_loss: 0.6935 - val_acc: 0.5062
Epoch 3/100
Epoch 00003: val_loss improved from 0.69099 to 0.66472, saving model to best_m.h5
 - 2s - loss: 0.6846 - acc: 0.5386 - val_loss: 0.6647 - val_acc: 0.5062
Epoch 4/100
Epoch 00004: val_loss improved from 0.66472 to 0.65701, saving model to best_m.h5
 - 2s - loss: 0.6271 - acc: 0.6164 - val_loss: 0.6570 - val_acc: 0.6733
Epoch 5/100
Epoch 00005: val_loss improved from 0.65701 to 0.57944, saving model to best_m.h5
 - 2s - loss: 0.6352 - acc: 0.5895 - val_loss: 0.5794 - val_acc: 0.7107
Epoch 6/100
Epoch 00006: val_loss did not improve
 - 2s - loss: 0.5890 - acc: 0.7014 - val_loss: 0.6157 - val_acc: 0.6484
Epoch 7/100
Epoch 00007: val_loss did not improve
 - 2s - loss: 0.5752 - acc: 0.6922 - val_loss: 0.5915 - val_acc: 0.6908
Epoch 8/100
Epoch 00008: val_loss improved from 0.57944 to 0.565

Epoch 63/100
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.2988 - acc: 0.8716 - val_loss: 0.3324 - val_acc: 0.8404
Epoch 64/100
Epoch 00064: val_loss did not improve
 - 2s - loss: 0.3040 - acc: 0.8641 - val_loss: 0.3209 - val_acc: 0.8653
Epoch 65/100
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.2909 - acc: 0.8708 - val_loss: 0.3255 - val_acc: 0.8653
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.3055 - acc: 0.8650 - val_loss: 0.3273 - val_acc: 0.8479
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 2s - loss: 0.3074 - acc: 0.8625 - val_loss: 0.3220 - val_acc: 0.8678
Epoch 68/100
Epoch 00068: val_loss did not improve
 - 2s - loss: 0.3336 - acc: 0.8362 - val_loss: 0.3206 - val_acc: 0.8554
Epoch 69/100
Epoch 00069: val_loss did not improve
 - 2s - loss: 0.2837 - acc: 0.8714 - val_loss: 0.3273 - val_acc: 0.8603
Epoch 70/100
Epoch 00070: val_loss did not improve
 - 2s - loss: 0.3003 - acc: 0.8608 - val_loss: 0.3257 - val_acc: 0.8504
Epoch 71/100
Epo

In [20]:
from keras.callbacks import ModelCheckpoint,LearningRateScheduler
def ConvBlock(model, layers, filters):
    '''Create [layers] layers consisting of zero padding, a convolution with [filters] 3x3 filters and batch normalization. Perform max pooling after the last layer.'''
    for i in range(layers):
        model.add(ZeroPadding2D((1, 1)))
        model.add(Conv2D(filters, (3, 3), activation='relu'))
        model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()

    # Input image: 75x75x3
    model.add(Lambda(lambda x: x, input_shape=(75, 75, 3)))
    ConvBlock(model, 1, 32)
    # 37x37x32
    ConvBlock(model, 1, 64)
    # 18x18x64
    ConvBlock(model, 1, 128)
    # 9x9x128
    ConvBlock(model, 1, 128)
    # 4x4x128
    model.add(Flatten())
    model.add(Dense(1,activation='sigmoid'))
    
    return model
print('model model')


def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=100, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=3)


with open('../features/cnn_4_aug_rescale_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))

# pre 3fold, 15080
# fold3,1, 15418
# fold3,2, 15217 --> 15118
# fold3,3, 15352
# fold5,1, 15243

model model
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.88240, saving model to best_m.h5
 - 9s - loss: 0.8187 - acc: 0.6908 - val_loss: 0.8824 - val_acc: 0.5140
Epoch 2/100
Epoch 00002: val_loss did not improve
 - 2s - loss: 0.4846 - acc: 0.7725 - val_loss: 1.2013 - val_acc: 0.5140
Epoch 3/100
Epoch 00003: val_loss did not improve
 - 2s - loss: 0.4683 - acc: 0.7723 - val_loss: 1.5571 - val_acc: 0.5140
Epoch 4/100
Epoch 00004: val_loss did not improve
 - 2s - loss: 0.4659 - acc: 0.7840 - val_loss: 0.9378 - val_acc: 0.5140
Epoch 5/100
Epoch 00005: val_loss did not improve
 - 2s - loss: 0.4135 - acc: 0.8026 - val_loss: 1.7565 - val_acc: 0.5140
Epoch 6/100
Epoch 00006: val_loss improved from 0.88240 to 0.71943, saving model to best_m.h5
 - 2s - loss: 0.4412 - acc: 0.7957 - val_loss: 0.7194 - val_acc: 0.6075
Epoch 7/100
Epoch 00007: val_loss did not improve
 - 2s - loss: 0.4001 - acc: 0.8107 - val_loss: 0.7821 - val_acc: 0.5421
Epoch 8/100
Epoch 00008: val_loss improved from 0.

Epoch 65/100
Epoch 00065: val_loss did not improve
 - 2s - loss: 0.2033 - acc: 0.9062 - val_loss: 0.2732 - val_acc: 0.8692
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 2s - loss: 0.1928 - acc: 0.9174 - val_loss: 0.2884 - val_acc: 0.8673
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 2s - loss: 0.2035 - acc: 0.9162 - val_loss: 0.2721 - val_acc: 0.8766
Epoch 68/100
Epoch 00068: val_loss did not improve
 - 2s - loss: 0.2013 - acc: 0.9172 - val_loss: 0.2671 - val_acc: 0.8766
Epoch 69/100
Epoch 00069: val_loss did not improve
 - 2s - loss: 0.1754 - acc: 0.9299 - val_loss: 0.2660 - val_acc: 0.8766
Epoch 70/100
Epoch 00070: val_loss did not improve
 - 2s - loss: 0.1961 - acc: 0.9176 - val_loss: 0.2718 - val_acc: 0.8654
Epoch 71/100
Epoch 00071: val_loss did not improve
 - 2s - loss: 0.2123 - acc: 0.9079 - val_loss: 0.2913 - val_acc: 0.8710
Epoch 72/100
Epoch 00072: val_loss did not improve
 - 2s - loss: 0.2012 - acc: 0.9077 - val_loss: 0.2699 - val_acc: 0.8710
Epoch 73/100
Epo

Epoch 28/100
Epoch 00028: val_loss did not improve
 - 2s - loss: 0.2891 - acc: 0.8729 - val_loss: 0.2730 - val_acc: 0.8766
Epoch 29/100
Epoch 00029: val_loss did not improve
 - 2s - loss: 0.2688 - acc: 0.8779 - val_loss: 0.2497 - val_acc: 0.8729
Epoch 30/100
Epoch 00030: val_loss did not improve
 - 2s - loss: 0.2968 - acc: 0.8655 - val_loss: 0.2560 - val_acc: 0.8935
Epoch 31/100
Epoch 00031: val_loss improved from 0.24587 to 0.23880, saving model to best_m.h5
 - 2s - loss: 0.2613 - acc: 0.8980 - val_loss: 0.2388 - val_acc: 0.8860
Epoch 32/100
Epoch 00032: val_loss improved from 0.23880 to 0.22742, saving model to best_m.h5
 - 2s - loss: 0.2324 - acc: 0.8996 - val_loss: 0.2274 - val_acc: 0.9028
Epoch 33/100
Epoch 00033: val_loss did not improve
 - 2s - loss: 0.2401 - acc: 0.9030 - val_loss: 0.2329 - val_acc: 0.8916
Epoch 34/100
Epoch 00034: val_loss improved from 0.22742 to 0.21735, saving model to best_m.h5
 - 2s - loss: 0.2271 - acc: 0.9013 - val_loss: 0.2173 - val_acc: 0.8953
Epoch 3

Epoch 94/100
Epoch 00094: val_loss did not improve
 - 2s - loss: 0.1509 - acc: 0.9356 - val_loss: 0.2509 - val_acc: 0.8841
Epoch 95/100
Epoch 00095: val_loss did not improve
 - 2s - loss: 0.1840 - acc: 0.9257 - val_loss: 0.2275 - val_acc: 0.9047
Epoch 96/100
Epoch 00096: val_loss did not improve
 - 2s - loss: 0.1533 - acc: 0.9430 - val_loss: 0.2274 - val_acc: 0.9103
Epoch 97/100
Epoch 00097: val_loss did not improve
 - 2s - loss: 0.1452 - acc: 0.9380 - val_loss: 0.2309 - val_acc: 0.9121
Epoch 98/100
Epoch 00098: val_loss did not improve
 - 2s - loss: 0.1923 - acc: 0.9195 - val_loss: 0.2503 - val_acc: 0.8897
Epoch 99/100
Epoch 00099: val_loss did not improve
 - 2s - loss: 0.1495 - acc: 0.9342 - val_loss: 0.2272 - val_acc: 0.9103
Epoch 100/100
Epoch 00100: val_loss did not improve
 - 2s - loss: 0.1486 - acc: 0.9363 - val_loss: 0.2291 - val_acc: 0.9065
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.68706, saving model to best_m.h5
 - 9s - loss: 0.7414 - acc: 0.6860 - val_loss: 0

Epoch 56/100
Epoch 00056: val_loss did not improve
 - 2s - loss: 0.1555 - acc: 0.9280 - val_loss: 0.3011 - val_acc: 0.8783
Epoch 57/100
Epoch 00057: val_loss did not improve
 - 2s - loss: 0.1715 - acc: 0.9305 - val_loss: 0.2988 - val_acc: 0.8745
Epoch 58/100
Epoch 00058: val_loss did not improve
 - 2s - loss: 0.1581 - acc: 0.9280 - val_loss: 0.2879 - val_acc: 0.8839
Epoch 59/100
Epoch 00059: val_loss did not improve
 - 2s - loss: 0.1661 - acc: 0.9261 - val_loss: 0.3020 - val_acc: 0.8820
Epoch 60/100
Epoch 00060: val_loss did not improve
 - 2s - loss: 0.1666 - acc: 0.9260 - val_loss: 0.3015 - val_acc: 0.8839
Epoch 61/100
Epoch 00061: val_loss did not improve
 - 2s - loss: 0.1522 - acc: 0.9383 - val_loss: 0.3137 - val_acc: 0.8745
Epoch 62/100
Epoch 00062: val_loss did not improve
 - 2s - loss: 0.1644 - acc: 0.9337 - val_loss: 0.3091 - val_acc: 0.8783
Epoch 63/100
Epoch 00063: val_loss did not improve
 - 2s - loss: 0.1556 - acc: 0.9322 - val_loss: 0.3048 - val_acc: 0.8895
Epoch 64/100
Epo

In [12]:
def create_model():
    '''Create the FCN and return a keras model.'''

    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3), input_shape=(75, 75, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))
    return model
print('model model')


def kfold_train(fold_cnt=3,rnd=42):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            rotation_range = 20,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            zoom_range = 0.2,
            horizontal_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=100, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=3)


with open('../features/cnn_5_aug_rescale_preprocess_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))
# 2364

# fold4
# fold5

model model
