In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.utils.np_utils import to_categorical
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Flatten, Lambda, Activation
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam

train_df = pd.read_json('../input/train.json')
test_df = pd.read_json('../input/test.json')

Using TensorFlow backend.


In [2]:
def std_img(x):
    for i in range(3):
        x[:, :, i] -= np.mean(x[:, :, i].flatten())
        x[:, :, i] /= np.std(x[:, :, i].flatten()) + 1e-7
    return x

def get_image(df):
    '''Create 3-channel 'images'. Return rescale-normalised images.'''
    images = []
    for i, row in df.iterrows():
        # Formulate the bands as 75x75 arrays
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 - band_2)/2
        if row['inc_angle'] == 'na':
            ang = -1
        else:
            ang = float(row['inc_angle'])
        band_3 = band_3*ang

        # Rescale
        img = np.dstack([band_1,band_2,band_3])
        img = std_img(img)

        images.append(img)
    return np.array(images)


train_x = get_image(train_df)
test_x = get_image(test_df)

print(train_x.shape,test_x.shape)

(1604, 75, 75, 3) (8424, 75, 75, 3)


In [3]:
y = train_df.is_iceberg.values
print(y[:5])

[0 0 1 0 0]


In [4]:
from keras.callbacks import ModelCheckpoint,LearningRateScheduler
from keras.layers import Input
from keras.models import Model

def create_vgg_model():
    img_input = Input(shape=(75,75,3))
    channel_axis = 3
    
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    x = Flatten()(x)
    x = Dense(256,activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(1,activation='sigmoid')(x)
    
    return Model(img_input, x, name='inception_v3')
print('model model')


model model


In [9]:
from sklearn.model_selection import KFold
from keras.preprocessing.image import ImageDataGenerator


def lr_f(epoch):
    if epoch<10:
        return 0.001
    elif epoch<30:
        return 0.0005
    else:
        return 0.0001

def kfold_train(fold_cnt=3,rnd=9):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 64
        steps_train = len(curr_y)//bat_size
        
        
        model = create_vgg_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=100, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.69480, saving model to best_m.h5
 - 8s - loss: 0.7070 - acc: 0.5253 - val_loss: 0.6948 - val_acc: 0.4663
Epoch 2/100
Epoch 00002: val_loss improved from 0.69480 to 0.69207, saving model to best_m.h5
 - 4s - loss: 0.6936 - acc: 0.4960 - val_loss: 0.6921 - val_acc: 0.5337
Epoch 3/100
Epoch 00003: val_loss improved from 0.69207 to 0.69181, saving model to best_m.h5
 - 5s - loss: 0.6928 - acc: 0.5225 - val_loss: 0.6918 - val_acc: 0.5337
Epoch 4/100
Epoch 00004: val_loss improved from 0.69181 to 0.69101, saving model to best_m.h5
 - 4s - loss: 0.6914 - acc: 0.5405 - val_loss: 0.6910 - val_acc: 0.5337
Epoch 5/100
Epoch 00005: val_loss improved from 0.69101 to 0.69033, saving model to best_m.h5
 - 4s - loss: 0.6917 - acc: 0.5266 - val_loss: 0.6903 - val_acc: 0.5337
Epoch 6/100
Epoch 00006: val_loss did not improve
 - 4s - loss: 0.7004 - acc: 0.5320 - val_loss: 0.6913 - val_acc: 0.5337
Epoch 7/100
Epoch 00007: val_loss did not improve
 -

Epoch 60/100
Epoch 00060: val_loss did not improve
 - 4s - loss: 0.2060 - acc: 0.9136 - val_loss: 0.3092 - val_acc: 0.8853
Epoch 61/100
Epoch 00061: val_loss did not improve
 - 4s - loss: 0.2001 - acc: 0.9240 - val_loss: 0.2918 - val_acc: 0.8778
Epoch 62/100
Epoch 00062: val_loss did not improve
 - 4s - loss: 0.1949 - acc: 0.9210 - val_loss: 0.3102 - val_acc: 0.8653
Epoch 63/100
Epoch 00063: val_loss did not improve
 - 4s - loss: 0.1634 - acc: 0.9325 - val_loss: 0.3226 - val_acc: 0.8778
Epoch 64/100
Epoch 00064: val_loss did not improve
 - 4s - loss: 0.2004 - acc: 0.9238 - val_loss: 0.3076 - val_acc: 0.8903
Epoch 65/100
Epoch 00065: val_loss improved from 0.28959 to 0.27794, saving model to best_m.h5
 - 4s - loss: 0.2202 - acc: 0.8977 - val_loss: 0.2779 - val_acc: 0.8903
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 4s - loss: 0.1814 - acc: 0.9280 - val_loss: 0.2894 - val_acc: 0.8853
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 4s - loss: 0.1710 - acc: 0.9197 - val_los

Epoch 22/100
Epoch 00022: val_loss improved from 0.23360 to 0.21000, saving model to best_m.h5
 - 4s - loss: 0.2962 - acc: 0.8787 - val_loss: 0.2100 - val_acc: 0.9177
Epoch 23/100
Epoch 00023: val_loss did not improve
 - 4s - loss: 0.3100 - acc: 0.8743 - val_loss: 0.2519 - val_acc: 0.8928
Epoch 24/100
Epoch 00024: val_loss did not improve
 - 4s - loss: 0.2751 - acc: 0.8832 - val_loss: 0.2429 - val_acc: 0.9002
Epoch 25/100
Epoch 00025: val_loss did not improve
 - 4s - loss: 0.2878 - acc: 0.8845 - val_loss: 0.2237 - val_acc: 0.9052
Epoch 26/100
Epoch 00026: val_loss did not improve
 - 4s - loss: 0.3019 - acc: 0.8712 - val_loss: 0.2518 - val_acc: 0.9002
Epoch 27/100
Epoch 00027: val_loss did not improve
 - 4s - loss: 0.2984 - acc: 0.8721 - val_loss: 0.2342 - val_acc: 0.8928
Epoch 28/100
Epoch 00028: val_loss did not improve
 - 4s - loss: 0.2759 - acc: 0.8863 - val_loss: 0.2337 - val_acc: 0.9002
Epoch 29/100
Epoch 00029: val_loss improved from 0.21000 to 0.20839, saving model to best_m.h5


Epoch 84/100
Epoch 00084: val_loss did not improve
 - 4s - loss: 0.2188 - acc: 0.9034 - val_loss: 0.1607 - val_acc: 0.9377
Epoch 85/100
Epoch 00085: val_loss did not improve
 - 4s - loss: 0.1889 - acc: 0.9314 - val_loss: 0.1626 - val_acc: 0.9252
Epoch 86/100
Epoch 00086: val_loss did not improve
 - 4s - loss: 0.2014 - acc: 0.9143 - val_loss: 0.1630 - val_acc: 0.9377
Epoch 87/100
Epoch 00087: val_loss did not improve
 - 4s - loss: 0.1602 - acc: 0.9293 - val_loss: 0.1689 - val_acc: 0.9302
Epoch 88/100
Epoch 00088: val_loss did not improve
 - 4s - loss: 0.2039 - acc: 0.9123 - val_loss: 0.1723 - val_acc: 0.9377
Epoch 89/100
Epoch 00089: val_loss did not improve
 - 4s - loss: 0.1932 - acc: 0.9155 - val_loss: 0.1672 - val_acc: 0.9327
Epoch 90/100
Epoch 00090: val_loss did not improve
 - 4s - loss: 0.1803 - acc: 0.9292 - val_loss: 0.1754 - val_acc: 0.9327
Epoch 91/100
Epoch 00091: val_loss did not improve
 - 4s - loss: 0.1918 - acc: 0.9114 - val_loss: 0.1836 - val_acc: 0.9302
Epoch 92/100
Epo

Epoch 44/100
Epoch 00044: val_loss improved from 0.27327 to 0.27158, saving model to best_m.h5
 - 4s - loss: 0.2770 - acc: 0.8888 - val_loss: 0.2716 - val_acc: 0.9052
Epoch 45/100
Epoch 00045: val_loss did not improve
 - 4s - loss: 0.2879 - acc: 0.8845 - val_loss: 0.2795 - val_acc: 0.8928
Epoch 46/100
Epoch 00046: val_loss did not improve
 - 4s - loss: 0.2640 - acc: 0.8949 - val_loss: 0.2727 - val_acc: 0.9052
Epoch 47/100
Epoch 00047: val_loss did not improve
 - 4s - loss: 0.2706 - acc: 0.8819 - val_loss: 0.2778 - val_acc: 0.9077
Epoch 48/100
Epoch 00048: val_loss improved from 0.27158 to 0.26783, saving model to best_m.h5
 - 4s - loss: 0.2674 - acc: 0.8838 - val_loss: 0.2678 - val_acc: 0.9077
Epoch 49/100
Epoch 00049: val_loss improved from 0.26783 to 0.26531, saving model to best_m.h5
 - 4s - loss: 0.2600 - acc: 0.8976 - val_loss: 0.2653 - val_acc: 0.9102
Epoch 50/100
Epoch 00050: val_loss did not improve
 - 4s - loss: 0.2710 - acc: 0.8862 - val_loss: 0.2754 - val_acc: 0.8853
Epoch 5

Epoch 00005: val_loss improved from 0.47264 to 0.37163, saving model to best_m.h5
 - 4s - loss: 0.4772 - acc: 0.7869 - val_loss: 0.3716 - val_acc: 0.8479
Epoch 6/100
Epoch 00006: val_loss did not improve
 - 4s - loss: 0.4203 - acc: 0.8022 - val_loss: 0.4323 - val_acc: 0.7880
Epoch 7/100
Epoch 00007: val_loss did not improve
 - 4s - loss: 0.4990 - acc: 0.7713 - val_loss: 0.4129 - val_acc: 0.8279
Epoch 8/100
Epoch 00008: val_loss did not improve
 - 4s - loss: 0.4366 - acc: 0.8073 - val_loss: 0.4029 - val_acc: 0.8304
Epoch 9/100
Epoch 00009: val_loss improved from 0.37163 to 0.36310, saving model to best_m.h5
 - 4s - loss: 0.3555 - acc: 0.8480 - val_loss: 0.3631 - val_acc: 0.8529
Epoch 10/100
Epoch 00010: val_loss improved from 0.36310 to 0.34628, saving model to best_m.h5
 - 4s - loss: 0.4332 - acc: 0.8160 - val_loss: 0.3463 - val_acc: 0.8728
Epoch 11/100
Epoch 00011: val_loss improved from 0.34628 to 0.34382, saving model to best_m.h5
 - 4s - loss: 0.3510 - acc: 0.8563 - val_loss: 0.343

Epoch 65/100
Epoch 00065: val_loss did not improve
 - 4s - loss: 0.1828 - acc: 0.9358 - val_loss: 0.1788 - val_acc: 0.9202
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 4s - loss: 0.1937 - acc: 0.9244 - val_loss: 0.1741 - val_acc: 0.9177
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 4s - loss: 0.1793 - acc: 0.9184 - val_loss: 0.1776 - val_acc: 0.9177
Epoch 68/100
Epoch 00068: val_loss did not improve
 - 4s - loss: 0.1844 - acc: 0.9264 - val_loss: 0.1790 - val_acc: 0.9202
Epoch 69/100
Epoch 00069: val_loss did not improve
 - 4s - loss: 0.1882 - acc: 0.9219 - val_loss: 0.1800 - val_acc: 0.9177
Epoch 70/100
Epoch 00070: val_loss did not improve
 - 4s - loss: 0.1643 - acc: 0.9297 - val_loss: 0.2110 - val_acc: 0.9052
Epoch 71/100
Epoch 00071: val_loss did not improve
 - 4s - loss: 0.1886 - acc: 0.9271 - val_loss: 0.2074 - val_acc: 0.9002
Epoch 72/100
Epoch 00072: val_loss did not improve
 - 4s - loss: 0.1917 - acc: 0.9129 - val_loss: 0.1748 - val_acc: 0.9102
Epoch 73/100
Epo

In [10]:
import pickle
with open('../features/vgg_aug1_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
from sklearn.metrics import log_loss
print(log_loss(y,train_pred))
    
submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/vgg_aug1_sub.csv', index=False)

# pre 2370
# new 2088

0.208822221202
         id  is_iceberg
0  5941774d    0.047048
1  4023181e    0.724682
2  b20200e4    0.228813
3  e7f018bb    0.997117
4  4371c8c3    0.084673


In [11]:
from keras.callbacks import ModelCheckpoint
from keras.layers import Input,AveragePooling2D,GlobalMaxPooling2D,GlobalAveragePooling2D,Flatten
from keras import layers
from keras.models import Model

def conv2d_bn(x,
              filters,
              num_row,
              num_col,
              padding='same',
              strides=(1, 1),
              name=None):

    if name is not None:
        bn_name = name + '_bn'
        conv_name = name + '_conv'
    else:
        bn_name = None
        conv_name = None
    bn_axis = 3
    x = Conv2D(
        filters, (num_row, num_col),
        strides=strides,
        padding=padding,
        use_bias=False,
        name=conv_name)(x)
    x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
    x = Activation('relu', name=name)(x)
    return x

def create_incept_model():
    img_input = Input(shape=(75,75,3))
    channel_axis = 3
    
    # bn
    x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid')
    x = conv2d_bn(x, 32, 3, 3, padding='valid')
    x = conv2d_bn(x, 64, 3, 3)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv2d_bn(x, 64, 1, 1, padding='valid')
    x = conv2d_bn(x, 64, 3, 3, padding='valid')
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    # mixed 0, 1, 2: 35 x 35 x 256
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 64, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 64, 3, 3)

    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
    x = layers.concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed0')
    
    # mixed 1: 35 x 35 x 256
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 64, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 64, 3, 3)

    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
    x = layers.concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed1')
    
    # mixed 2: 35 x 35 x 256
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 64, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 64, 3, 3)

    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
    x = layers.concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed2')

    x = Conv2D(64, 3, strides=2, padding='same',activation='relu')(x)
    x = Flatten()(x)
    x = Dense(256,activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(1,activation='sigmoid')(x)
    
    return Model(img_input, x, name='inception_v3')
print('model model')

model model


In [23]:
def kfold_train(fold_cnt=3,rnd=9):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_incept_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=100, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.62281, saving model to best_m.h5
 - 70s - loss: 0.5516 - acc: 0.7372 - val_loss: 0.6228 - val_acc: 0.6459
Epoch 2/100
Epoch 00002: val_loss did not improve
 - 4s - loss: 0.4184 - acc: 0.8100 - val_loss: 0.8311 - val_acc: 0.5436
Epoch 3/100
Epoch 00003: val_loss did not improve
 - 4s - loss: 0.3817 - acc: 0.8216 - val_loss: 1.5079 - val_acc: 0.5910
Epoch 4/100
Epoch 00004: val_loss improved from 0.62281 to 0.56129, saving model to best_m.h5
 - 4s - loss: 0.3639 - acc: 0.8272 - val_loss: 0.5613 - val_acc: 0.6658
Epoch 5/100
Epoch 00005: val_loss did not improve
 - 4s - loss: 0.4011 - acc: 0.8291 - val_loss: 3.8911 - val_acc: 0.5536
Epoch 6/100
Epoch 00006: val_loss did not improve
 - 4s - loss: 0.3293 - acc: 0.8600 - val_loss: 0.8804 - val_acc: 0.7307
Epoch 7/100
Epoch 00007: val_loss improved from 0.56129 to 0.35402, saving model to best_m.h5
 - 4s - loss: 0.3570 - acc: 0.8457 - val_loss: 0.3540 - val_acc: 0.8753
Epoch 8/100
Epoch

Epoch 65/100
Epoch 00065: val_loss did not improve
 - 3s - loss: 0.1448 - acc: 0.9367 - val_loss: 0.3103 - val_acc: 0.8953
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 3s - loss: 0.1726 - acc: 0.9273 - val_loss: 0.3274 - val_acc: 0.8803
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 3s - loss: 0.1377 - acc: 0.9425 - val_loss: 0.3051 - val_acc: 0.8878
Epoch 68/100
Epoch 00068: val_loss did not improve
 - 3s - loss: 0.1361 - acc: 0.9508 - val_loss: 0.3442 - val_acc: 0.8828
Epoch 69/100
Epoch 00069: val_loss did not improve
 - 3s - loss: 0.1516 - acc: 0.9383 - val_loss: 0.3424 - val_acc: 0.8778
Epoch 70/100
Epoch 00070: val_loss did not improve
 - 4s - loss: 0.1598 - acc: 0.9314 - val_loss: 0.3244 - val_acc: 0.8903
Epoch 71/100
Epoch 00071: val_loss did not improve
 - 3s - loss: 0.1443 - acc: 0.9358 - val_loss: 0.3068 - val_acc: 0.9027
Epoch 72/100
Epoch 00072: val_loss did not improve
 - 3s - loss: 0.1416 - acc: 0.9458 - val_loss: 0.3416 - val_acc: 0.8803
Epoch 73/100
Epo

Epoch 29/100
Epoch 00029: val_loss did not improve
 - 4s - loss: 0.2720 - acc: 0.8762 - val_loss: 0.2401 - val_acc: 0.8728
Epoch 30/100
Epoch 00030: val_loss did not improve
 - 4s - loss: 0.2293 - acc: 0.8992 - val_loss: 0.3797 - val_acc: 0.8204
Epoch 31/100
Epoch 00031: val_loss did not improve
 - 4s - loss: 0.2309 - acc: 0.8983 - val_loss: 0.2236 - val_acc: 0.8953
Epoch 32/100
Epoch 00032: val_loss improved from 0.17958 to 0.17294, saving model to best_m.h5
 - 4s - loss: 0.2286 - acc: 0.9070 - val_loss: 0.1729 - val_acc: 0.9302
Epoch 33/100
Epoch 00033: val_loss did not improve
 - 4s - loss: 0.2291 - acc: 0.8908 - val_loss: 0.1780 - val_acc: 0.9277
Epoch 34/100
Epoch 00034: val_loss did not improve
 - 4s - loss: 0.2507 - acc: 0.9043 - val_loss: 0.1797 - val_acc: 0.9227
Epoch 35/100
Epoch 00035: val_loss did not improve
 - 4s - loss: 0.2313 - acc: 0.9020 - val_loss: 0.1809 - val_acc: 0.9152
Epoch 36/100
Epoch 00036: val_loss did not improve
 - 4s - loss: 0.1994 - acc: 0.9142 - val_los

Epoch 95/100
Epoch 00095: val_loss did not improve
 - 3s - loss: 0.1418 - acc: 0.9350 - val_loss: 0.1880 - val_acc: 0.9152
Epoch 96/100
Epoch 00096: val_loss did not improve
 - 3s - loss: 0.1156 - acc: 0.9567 - val_loss: 0.2189 - val_acc: 0.9002
Epoch 97/100
Epoch 00097: val_loss did not improve
 - 4s - loss: 0.1372 - acc: 0.9433 - val_loss: 0.2087 - val_acc: 0.9027
Epoch 98/100
Epoch 00098: val_loss did not improve
 - 4s - loss: 0.1264 - acc: 0.9417 - val_loss: 0.2212 - val_acc: 0.9202
Epoch 99/100
Epoch 00099: val_loss did not improve
 - 3s - loss: 0.1351 - acc: 0.9458 - val_loss: 0.2087 - val_acc: 0.9177
Epoch 100/100
Epoch 00100: val_loss did not improve
 - 4s - loss: 0.1399 - acc: 0.9458 - val_loss: 0.1945 - val_acc: 0.9077
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.65896, saving model to best_m.h5
 - 77s - loss: 0.5242 - acc: 0.7372 - val_loss: 0.6590 - val_acc: 0.4938
Epoch 2/100
Epoch 00002: val_loss did not improve
 - 4s - loss: 0.4147 - acc: 0.7941 - val_loss: 0

Epoch 59/100
Epoch 00059: val_loss did not improve
 - 4s - loss: 0.1723 - acc: 0.9281 - val_loss: 0.2245 - val_acc: 0.9202
Epoch 60/100
Epoch 00060: val_loss did not improve
 - 4s - loss: 0.1612 - acc: 0.9325 - val_loss: 0.2391 - val_acc: 0.9277
Epoch 61/100
Epoch 00061: val_loss did not improve
 - 4s - loss: 0.1733 - acc: 0.9145 - val_loss: 0.2079 - val_acc: 0.9227
Epoch 62/100
Epoch 00062: val_loss did not improve
 - 4s - loss: 0.1549 - acc: 0.9392 - val_loss: 0.2148 - val_acc: 0.9227
Epoch 63/100
Epoch 00063: val_loss did not improve
 - 4s - loss: 0.1592 - acc: 0.9358 - val_loss: 0.2390 - val_acc: 0.9227
Epoch 64/100
Epoch 00064: val_loss did not improve
 - 4s - loss: 0.1412 - acc: 0.9408 - val_loss: 0.2911 - val_acc: 0.8928
Epoch 65/100
Epoch 00065: val_loss did not improve
 - 4s - loss: 0.1809 - acc: 0.9291 - val_loss: 0.2085 - val_acc: 0.9227
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 4s - loss: 0.1679 - acc: 0.9317 - val_loss: 0.2264 - val_acc: 0.9127
Epoch 67/100
Epo

Epoch 00024: val_loss did not improve
 - 4s - loss: 0.2957 - acc: 0.8870 - val_loss: 0.2345 - val_acc: 0.9102
Epoch 25/100
Epoch 00025: val_loss did not improve
 - 4s - loss: 0.2852 - acc: 0.8839 - val_loss: 0.2289 - val_acc: 0.9102
Epoch 26/100
Epoch 00026: val_loss did not improve
 - 4s - loss: 0.2307 - acc: 0.9008 - val_loss: 0.2068 - val_acc: 0.9002
Epoch 27/100
Epoch 00027: val_loss did not improve
 - 4s - loss: 0.2377 - acc: 0.9058 - val_loss: 0.2605 - val_acc: 0.8903
Epoch 28/100
Epoch 00028: val_loss improved from 0.19667 to 0.19325, saving model to best_m.h5
 - 4s - loss: 0.2735 - acc: 0.8858 - val_loss: 0.1933 - val_acc: 0.9102
Epoch 29/100
Epoch 00029: val_loss did not improve
 - 4s - loss: 0.2441 - acc: 0.8950 - val_loss: 0.3652 - val_acc: 0.8254
Epoch 30/100
Epoch 00030: val_loss did not improve
 - 4s - loss: 0.2284 - acc: 0.8881 - val_loss: 0.3042 - val_acc: 0.8479
Epoch 31/100
Epoch 00031: val_loss improved from 0.19325 to 0.18756, saving model to best_m.h5
 - 4s - loss:

Epoch 87/100
Epoch 00087: val_loss did not improve
 - 4s - loss: 0.1365 - acc: 0.9558 - val_loss: 0.1951 - val_acc: 0.9227
Epoch 88/100
Epoch 00088: val_loss did not improve
 - 4s - loss: 0.1749 - acc: 0.9314 - val_loss: 0.1769 - val_acc: 0.9302
Epoch 89/100
Epoch 00089: val_loss did not improve
 - 4s - loss: 0.1680 - acc: 0.9408 - val_loss: 0.2340 - val_acc: 0.9077
Epoch 90/100
Epoch 00090: val_loss did not improve
 - 4s - loss: 0.1650 - acc: 0.9298 - val_loss: 0.1845 - val_acc: 0.9252
Epoch 91/100
Epoch 00091: val_loss did not improve
 - 4s - loss: 0.1473 - acc: 0.9433 - val_loss: 0.1807 - val_acc: 0.9202
Epoch 92/100
Epoch 00092: val_loss did not improve
 - 4s - loss: 0.1321 - acc: 0.9417 - val_loss: 0.1873 - val_acc: 0.9152
Epoch 93/100
Epoch 00093: val_loss did not improve
 - 4s - loss: 0.1691 - acc: 0.9375 - val_loss: 0.2263 - val_acc: 0.9127
Epoch 94/100
Epoch 00094: val_loss did not improve
 - 4s - loss: 0.1276 - acc: 0.9525 - val_loss: 0.1919 - val_acc: 0.9277
Epoch 95/100
Epo

In [24]:
with open('../features/incept_aug1_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))
    
submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/incept_aug1_sub.csv', index=False)

# pre 2256
# new 16 batch 1953
# new 64 batch 2063

0.19531810891
         id  is_iceberg
0  5941774d    0.093266
1  4023181e    0.963237
2  b20200e4    0.185977
3  e7f018bb    0.999560
4  4371c8c3    0.466361


In [25]:
from keras.layers import Input,AveragePooling2D,GlobalMaxPooling2D,GlobalAveragePooling2D,Flatten


def identity_block(input_tensor, kernel_size, filters, stage, block):
    """The identity block is the block that has no conv layer at shortcut.
    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names
    # Returns
        Output tensor for the block.
    """
    filters1, filters2, filters3 = filters
    bn_axis = 3
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters2, kernel_size,
               padding='same', name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    x = layers.add([x, input_tensor])
    x = Activation('relu')(x)
    return x


def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
    """A block that has a conv layer at shortcut.
    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names
    # Returns
        Output tensor for the block.
    Note that from stage 3, the first conv layer at main path is with strides=(2,2)
    And the shortcut should have strides=(2,2) as well
    """
    filters1, filters2, filters3 = filters

    bn_axis = 3
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv2D(filters1, (1, 1), strides=strides,
               name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters2, kernel_size, padding='same',
               name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    shortcut = Conv2D(filters3, (1, 1), strides=strides,
                      name=conv_name_base + '1')(input_tensor)
    shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)

    x = layers.add([x, shortcut])
    x = Activation('relu')(x)
    return x

def create_resnet_model():
    img_input = Input(shape=(75,75,3))
    channel_axis = 3
    bn_axis = 3
    x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input)
    x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv_block(x, 3, [64, 64, 96], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 96], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 96], stage=2, block='c')

    x = conv_block(x, 3, [96, 96, 128], stage=3, block='a')
    x = identity_block(x, 3, [96, 96, 128], stage=3, block='b')
    x = identity_block(x, 3, [96, 96, 128], stage=3, block='c')
    x = identity_block(x, 3, [96, 96, 128], stage=3, block='d')
    
    x = Conv2D(128, 3, strides=2, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Flatten()(x)
    x = Dense(256,activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(1,activation='sigmoid')(x)
    
    return Model(img_input, x, name='inception_v3')
print('model model')


model model


In [26]:
def kfold_train(fold_cnt=3,rnd=9):
    train_pred, test_pred = np.zeros((1604,1)),np.zeros((8424,1))
    kf = KFold(n_splits=fold_cnt, shuffle=True, random_state=2*rnd)
    for train_index, test_index in kf.split(train_x):
        curr_x,curr_y = train_x[train_index],y[train_index]
        val_x,val_y = train_x[test_index],y[test_index]
        datagen = ImageDataGenerator(
            width_shift_range=0.05,
            height_shift_range=0.05,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        
        
        bat_size = 16
        steps_train = len(curr_y)//bat_size
        
        
        model = create_resnet_model()
        model.compile(loss='binary_crossentropy', optimizer=Adam(0.0005), metrics=['accuracy'])
        model_p = 'best_m.h5'
        model_chk = ModelCheckpoint(filepath=model_p, monitor='val_loss', save_best_only=True, verbose=1)
        lr_s = LearningRateScheduler(lr_f)
        model.fit_generator(datagen.flow(curr_x, curr_y, batch_size=bat_size),
                  validation_data=(val_x,val_y),
                  steps_per_epoch = steps_train,
                  epochs=100, 
                  verbose=2,
                  callbacks=[model_chk,lr_s]
                 )
        
        
        model = load_model(model_p)
        train_pred[test_index] = model.predict(val_x)
        test_pred = test_pred + model.predict(test_x)/fold_cnt
        print('============================')
    return train_pred,test_pred

train_pred,test_pred = kfold_train(fold_cnt=4)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.67456, saving model to best_m.h5
 - 82s - loss: 0.9562 - acc: 0.5378 - val_loss: 0.6746 - val_acc: 0.5586
Epoch 2/100
Epoch 00002: val_loss improved from 0.67456 to 0.63300, saving model to best_m.h5
 - 5s - loss: 0.5722 - acc: 0.6839 - val_loss: 0.6330 - val_acc: 0.5810
Epoch 3/100
Epoch 00003: val_loss improved from 0.63300 to 0.46487, saving model to best_m.h5
 - 5s - loss: 0.4608 - acc: 0.7941 - val_loss: 0.4649 - val_acc: 0.7531
Epoch 4/100
Epoch 00004: val_loss improved from 0.46487 to 0.43041, saving model to best_m.h5
 - 5s - loss: 0.4093 - acc: 0.8066 - val_loss: 0.4304 - val_acc: 0.7781
Epoch 5/100
Epoch 00005: val_loss did not improve
 - 5s - loss: 0.4232 - acc: 0.8081 - val_loss: 0.5465 - val_acc: 0.7556
Epoch 6/100
Epoch 00006: val_loss did not improve
 - 5s - loss: 0.4212 - acc: 0.8272 - val_loss: 0.7757 - val_acc: 0.6359
Epoch 7/100
Epoch 00007: val_loss did not improve
 - 5s - loss: 0.5626 - acc: 0.7337 - val_loss

Epoch 61/100
Epoch 00061: val_loss did not improve
 - 5s - loss: 0.1986 - acc: 0.9233 - val_loss: 0.3117 - val_acc: 0.8628
Epoch 62/100
Epoch 00062: val_loss did not improve
 - 5s - loss: 0.2075 - acc: 0.9117 - val_loss: 0.3502 - val_acc: 0.8479
Epoch 63/100
Epoch 00063: val_loss did not improve
 - 5s - loss: 0.2394 - acc: 0.9076 - val_loss: 0.4026 - val_acc: 0.8479
Epoch 64/100
Epoch 00064: val_loss did not improve
 - 5s - loss: 0.2142 - acc: 0.9058 - val_loss: 0.3114 - val_acc: 0.8753
Epoch 65/100
Epoch 00065: val_loss did not improve
 - 5s - loss: 0.1811 - acc: 0.9183 - val_loss: 0.2958 - val_acc: 0.8728
Epoch 66/100
Epoch 00066: val_loss did not improve
 - 5s - loss: 0.1978 - acc: 0.9225 - val_loss: 0.2892 - val_acc: 0.8828
Epoch 67/100
Epoch 00067: val_loss did not improve
 - 5s - loss: 0.1763 - acc: 0.9192 - val_loss: 0.2983 - val_acc: 0.8678
Epoch 68/100
Epoch 00068: val_loss did not improve
 - 5s - loss: 0.1953 - acc: 0.9187 - val_loss: 0.2877 - val_acc: 0.8853
Epoch 69/100
Epo

Epoch 26/100
Epoch 00026: val_loss improved from 0.21686 to 0.21432, saving model to best_m.h5
 - 5s - loss: 0.3265 - acc: 0.8575 - val_loss: 0.2143 - val_acc: 0.8953
Epoch 27/100
Epoch 00027: val_loss did not improve
 - 5s - loss: 0.3138 - acc: 0.8658 - val_loss: 0.2392 - val_acc: 0.8978
Epoch 28/100
Epoch 00028: val_loss did not improve
 - 5s - loss: 0.2774 - acc: 0.8900 - val_loss: 0.3052 - val_acc: 0.8653
Epoch 29/100
Epoch 00029: val_loss did not improve
 - 5s - loss: 0.2953 - acc: 0.8797 - val_loss: 0.2581 - val_acc: 0.8903
Epoch 30/100
Epoch 00030: val_loss did not improve
 - 5s - loss: 0.2843 - acc: 0.8691 - val_loss: 0.2258 - val_acc: 0.9102
Epoch 31/100
Epoch 00031: val_loss did not improve
 - 5s - loss: 0.2870 - acc: 0.8798 - val_loss: 0.2201 - val_acc: 0.9052
Epoch 32/100
Epoch 00032: val_loss improved from 0.21432 to 0.21313, saving model to best_m.h5
 - 5s - loss: 0.2684 - acc: 0.8908 - val_loss: 0.2131 - val_acc: 0.9227
Epoch 33/100
Epoch 00033: val_loss did not improve


Epoch 91/100
Epoch 00091: val_loss improved from 0.18341 to 0.18127, saving model to best_m.h5
 - 5s - loss: 0.2137 - acc: 0.9108 - val_loss: 0.1813 - val_acc: 0.9227
Epoch 92/100
Epoch 00092: val_loss did not improve
 - 5s - loss: 0.1980 - acc: 0.9208 - val_loss: 0.2781 - val_acc: 0.8728
Epoch 93/100
Epoch 00093: val_loss did not improve
 - 5s - loss: 0.1858 - acc: 0.9233 - val_loss: 0.1962 - val_acc: 0.9102
Epoch 94/100
Epoch 00094: val_loss did not improve
 - 5s - loss: 0.1921 - acc: 0.9233 - val_loss: 0.2165 - val_acc: 0.9027
Epoch 95/100
Epoch 00095: val_loss did not improve
 - 5s - loss: 0.1753 - acc: 0.9292 - val_loss: 0.1853 - val_acc: 0.9252
Epoch 96/100
Epoch 00096: val_loss did not improve
 - 5s - loss: 0.1733 - acc: 0.9356 - val_loss: 0.2176 - val_acc: 0.9002
Epoch 97/100
Epoch 00097: val_loss did not improve
 - 5s - loss: 0.2281 - acc: 0.9104 - val_loss: 0.1914 - val_acc: 0.9202
Epoch 98/100
Epoch 00098: val_loss did not improve
 - 5s - loss: 0.1805 - acc: 0.9142 - val_los

Epoch 54/100
Epoch 00054: val_loss did not improve
 - 5s - loss: 0.2392 - acc: 0.8956 - val_loss: 0.2429 - val_acc: 0.9027
Epoch 55/100
Epoch 00055: val_loss did not improve
 - 5s - loss: 0.2038 - acc: 0.9133 - val_loss: 0.2424 - val_acc: 0.8778
Epoch 56/100
Epoch 00056: val_loss did not improve
 - 5s - loss: 0.2302 - acc: 0.9033 - val_loss: 0.2641 - val_acc: 0.8928
Epoch 57/100
Epoch 00057: val_loss did not improve
 - 5s - loss: 0.2157 - acc: 0.9017 - val_loss: 0.2515 - val_acc: 0.8603
Epoch 58/100
Epoch 00058: val_loss did not improve
 - 5s - loss: 0.2250 - acc: 0.8883 - val_loss: 0.2612 - val_acc: 0.8728
Epoch 59/100
Epoch 00059: val_loss did not improve
 - 5s - loss: 0.1989 - acc: 0.9225 - val_loss: 0.2524 - val_acc: 0.8953
Epoch 60/100
Epoch 00060: val_loss did not improve
 - 5s - loss: 0.2016 - acc: 0.9167 - val_loss: 0.2405 - val_acc: 0.8928
Epoch 61/100
Epoch 00061: val_loss improved from 0.23602 to 0.23430, saving model to best_m.h5
 - 5s - loss: 0.2161 - acc: 0.9056 - val_los

 - 5s - loss: 0.3185 - acc: 0.8433 - val_loss: 0.2378 - val_acc: 0.8978
Epoch 18/100
Epoch 00018: val_loss improved from 0.23779 to 0.21287, saving model to best_m.h5
 - 5s - loss: 0.3040 - acc: 0.8675 - val_loss: 0.2129 - val_acc: 0.9177
Epoch 19/100
Epoch 00019: val_loss improved from 0.21287 to 0.20771, saving model to best_m.h5
 - 5s - loss: 0.3294 - acc: 0.8658 - val_loss: 0.2077 - val_acc: 0.9152
Epoch 20/100
Epoch 00020: val_loss did not improve
 - 5s - loss: 0.3058 - acc: 0.8581 - val_loss: 0.2276 - val_acc: 0.8928
Epoch 21/100
Epoch 00021: val_loss did not improve
 - 5s - loss: 0.3039 - acc: 0.8737 - val_loss: 0.2441 - val_acc: 0.9102
Epoch 22/100
Epoch 00022: val_loss did not improve
 - 5s - loss: 0.3554 - acc: 0.8491 - val_loss: 0.2816 - val_acc: 0.8728
Epoch 23/100
Epoch 00023: val_loss did not improve
 - 5s - loss: 0.3204 - acc: 0.8641 - val_loss: 0.3841 - val_acc: 0.7681
Epoch 24/100
Epoch 00024: val_loss did not improve
 - 5s - loss: 0.3167 - acc: 0.8814 - val_loss: 0.34

Epoch 81/100
Epoch 00081: val_loss did not improve
 - 5s - loss: 0.2347 - acc: 0.9025 - val_loss: 0.1968 - val_acc: 0.9302
Epoch 82/100
Epoch 00082: val_loss did not improve
 - 5s - loss: 0.2420 - acc: 0.8964 - val_loss: 0.1955 - val_acc: 0.9302
Epoch 83/100
Epoch 00083: val_loss improved from 0.17218 to 0.16761, saving model to best_m.h5
 - 5s - loss: 0.2155 - acc: 0.9114 - val_loss: 0.1676 - val_acc: 0.9277
Epoch 84/100
Epoch 00084: val_loss did not improve
 - 5s - loss: 0.1993 - acc: 0.9175 - val_loss: 0.2167 - val_acc: 0.9027
Epoch 85/100
Epoch 00085: val_loss did not improve
 - 5s - loss: 0.2193 - acc: 0.9083 - val_loss: 0.1864 - val_acc: 0.9277
Epoch 86/100
Epoch 00086: val_loss did not improve
 - 5s - loss: 0.2217 - acc: 0.9078 - val_loss: 0.2425 - val_acc: 0.8978
Epoch 87/100
Epoch 00087: val_loss did not improve
 - 5s - loss: 0.2016 - acc: 0.9167 - val_loss: 0.1961 - val_acc: 0.9302
Epoch 88/100
Epoch 00088: val_loss did not improve
 - 5s - loss: 0.2050 - acc: 0.9142 - val_los

In [27]:
with open('../features/resnet_aug1_feat.pkl','wb') as fout:
    pickle.dump([train_pred,test_pred],fout)

# train feat loss
print(log_loss(y,train_pred))
    
submission = pd.DataFrame()
submission['id']=test_df['id']
submission['is_iceberg']=test_pred
print(submission.head())
submission.to_csv('../results/resnet_aug1_sub.csv', index=False)


# pre 2179
# new 2121

0.21213588434
         id  is_iceberg
0  5941774d    0.209001
1  4023181e    0.701245
2  b20200e4    0.055742
3  e7f018bb    0.996637
4  4371c8c3    0.009555
