In [1]:
import pandas as pd # Used to open CSV files 
import numpy as np # Used for matrix operations
import cv2 # Used for image augmentation
from matplotlib import pyplot as plt
np.random.seed(666)


from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam

df_train = pd.read_json('kaggle_lceberg_data/train.json') # this is a dataframe


def get_scaled_imgs(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
        a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

        imgs.append(np.dstack((a, b, c)))

    return np.array(imgs)


Xtrain = get_scaled_imgs(df_train)
Ytrain = np.array(df_train['is_iceberg'])


df_train.inc_angle = df_train.inc_angle.replace('na',0)
idx_tr = np.where(df_train.inc_angle>0)


Ytrain = Ytrain[idx_tr[0]]
Xtrain = Xtrain[idx_tr[0],...]


def get_more_images(imgs):
    
    more_images = []
    vert_flip_imgs = []
    hori_flip_imgs = []
      
    for i in range(0,imgs.shape[0]):
        a=imgs[i,:,:,0]
        b=imgs[i,:,:,1]
        c=imgs[i,:,:,2]
        
        av=cv2.flip(a,1)
        ah=cv2.flip(a,0)
        bv=cv2.flip(b,1)
        bh=cv2.flip(b,0)
        cv=cv2.flip(c,1)
        ch=cv2.flip(c,0)
        
        vert_flip_imgs.append(np.dstack((av, bv, cv)))
        hori_flip_imgs.append(np.dstack((ah, bh, ch)))
      
    v = np.array(vert_flip_imgs)
    h = np.array(hori_flip_imgs)
       
    more_images = np.concatenate((imgs,v,h))
    
    return more_images


Xtr_more = get_more_images(Xtrain) 
Ytr_more = np.concatenate((Ytrain,Ytrain,Ytrain))



# def get_more_images(imgs):
    
#     more_images = []
#     vert_flip_imgs = []
#     hori_flip_imgs = []
      
#     for i in range(0,imgs.shape[0]):
#         a=imgs[i,:,:,0]
#         b=imgs[i,:,:,1]
#         c=imgs[i,:,:,2]
        
#         av=cv2.flip(a,1)
#         ah=cv2.flip(a,0)
#         bv=cv2.flip(b,1)
#         bh=cv2.flip(b,0)
#         cv=cv2.flip(c,1)
#         ch=cv2.flip(c,0)
        
#         vert_flip_imgs.append(np.dstack((av, bv, cv)))
#         hori_flip_imgs.append(np.dstack((ah, bh, ch)))
      
#     v = np.array(vert_flip_imgs)
#     h = np.array(hori_flip_imgs)
       
#     more_images = np.concatenate((imgs,v,h))
    
#     return more_images

# Ytr_more = np.concatenate((Ytrain,Ytrain,Ytrain))


def getModel():
    #Build keras model
    bn_momentum = 0.99
    
    model=Sequential()
    
#     model.add(Conv2D(32, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
# #     model.add( BatchNormalization(momentum = bn_momentum) )
#     model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#     model.add(Dropout(0.2))
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
#     model.add( BatchNormalization(momentum = bn_momentum) )
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu' ))
#     model.add( BatchNormalization(momentum = bn_momentum) )
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
#     model.add( BatchNormalization(momentum = bn_momentum) )
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.3))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
#     model.add( BatchNormalization(momentum = bn_momentum) )
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.3))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
#     model.add( BatchNormalization(momentum = bn_momentum) )
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
#     model.add( BatchNormalization(momentum = bn_momentum) )
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))

#     optimizer = Adam( lr = 1e-3, beta_1 = .9, beta_2 = .999, decay = 1e-3 )
    optimizer = Adam(lr=0.001, decay=0.0)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

model = getModel()
model.summary()

batch_size = 32
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts_2.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')

#------------------------------------------------------------------------------------------------------------------------------------------------------
# Let's view progress 
history = model.fit(Xtr_more, Ytr_more, batch_size=batch_size, epochs=50, verbose=1, callbacks=[earlyStopping, mcp_save, reduce_lr_loss], validation_split=0.25)

print(history.history.keys())
#
fig = plt.figure()
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower left')
#
fig.savefig('performance.png')
#---------------------------------------------------------------------------------------

model.load_weights(filepath = '.mdl_wts_2.hdf5')

score = model.evaluate(Xtrain, Ytrain, verbose=1)
print('Train score:', score[0])
print('Train accuracy:', score[1])

df_test = pd.read_json('kaggle_lceberg_data/test.json')
df_test.inc_angle = df_test.inc_angle.replace('na',0)
Xtest = (get_scaled_imgs(df_test))
pred_test = model.predict(Xtest)
print (type(pred_test))
print (pred_test.shape)
submission = pd.DataFrame({'id': df_test["id"], 'is_iceberg': pred_test.reshape((pred_test.shape[0]))})
print(submission.head(10))

submission.to_csv('4L_submission_2.csv', index=False)

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 73, 73, 64)        1792      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 128)       147584    
__________

In [12]:
print(submission.head(20))

          id    is_iceberg
0   5941774d  8.319946e-02
1   4023181e  9.996148e-01
2   b20200e4  1.402789e-02
3   e7f018bb  9.999987e-01
4   4371c8c3  9.999999e-01
5   a8d9b1fd  3.260162e-04
6   29e7727e  3.751566e-03
7   92a51ffb  9.999992e-01
8   c769ac97  2.528053e-06
9   aee0547d  7.023516e-06
10  565b28ac  6.648605e-08
11  e04e9775  9.999993e-01
12  8e8161d1  4.385532e-05
13  4cf4d256  1.000000e+00
14  139e5324  4.632185e-02
15  f156976f  3.061859e-02
16  68a117cc  4.948263e-04
17  d9aa7a56  1.602952e-02
18  9005b143  1.940684e-01
19  5f6d3988  9.999813e-01


In [13]:
print (pred_test[:20])

[[  8.31994563e-02]
 [  9.99614835e-01]
 [  1.40278898e-02]
 [  9.99998689e-01]
 [  9.99999881e-01]
 [  3.26016219e-04]
 [  3.75156594e-03]
 [  9.99999166e-01]
 [  2.52805285e-06]
 [  7.02351599e-06]
 [  6.64860451e-08]
 [  9.99999285e-01]
 [  4.38553179e-05]
 [  1.00000000e+00]
 [  4.63218465e-02]
 [  3.06185931e-02]
 [  4.94826294e-04]
 [  1.60295162e-02]
 [  1.94068357e-01]
 [  9.99981284e-01]]


In [16]:
for i in range(pred_test.shape[0]):
    if pred_test[i] > 0.8:
        pred_test[i] = 1
    elif pred_test[i] < 0.2:
        pred_test[i] = 0
print (pred_test[:100])

[[ 0.        ]
 [ 1.        ]
 [ 0.        ]
 [ 1.        ]
 [ 1.        ]
 [ 0.        ]
 [ 0.        ]
 [ 1.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 1.        ]
 [ 0.        ]
 [ 1.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 1.        ]
 [ 1.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.62696731]
 [ 1.        ]
 [ 1.        ]
 [ 0.        ]
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]
 [ 0.61077422]
 [ 1.        ]
 [ 0.        ]
 [ 1.        ]
 [ 0.        ]
 [ 1.        ]
 [ 0.        ]
 [ 0.7582888 ]
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]
 [ 0.        ]
 [ 1.        ]
 [ 0.        ]
 [ 0.77189946]
 [ 0.        ]
 [ 1.        ]
 [ 0.63772857]
 [ 0.        ]
 [ 1.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]
 [ 0.        ]
 [ 1.        ]
 [ 0.     