In [20]:
import os
import numpy as np
import pandas as pd
from skimage.util.montage import montage2d
import matplotlib.pyplot as plt

In [4]:
def load_and_format(in_path):
    out_df = pd.read_json(in_path)
    out_images = out_df.apply(lambda c_row: [np.stack([c_row['band_1'],c_row['band_2']], -1).reshape((75,75,2))],1)
    out_images = np.stack(out_images).squeeze()
    return out_df, out_images
train_df, train_images = load_and_format('../train.json')
print('training', train_df.shape, 'loaded', train_images.shape)
test_df, test_images = load_and_format('../test.json')
print('testing', test_df.shape, 'loaded', test_images.shape)
train_df.sample(3)

training (1604, 5) loaded (1604, 75, 75, 2)
testing (8424, 4) loaded (8424, 75, 75, 2)


Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
723,"[-20.983963, -18.986147, -18.636415, -21.13527...","[-28.293722, -27.309444, -26.149689, -26.14977...",a47b7dee,32.2297,1
791,"[-22.191774, -21.276672, -24.626541, -28.40771...","[-25.428791, -26.312912, -25.713694, -24.88588...",93beaaaa,41.8704,0
111,"[-25.652521, -27.236193, -21.93988, -20.547207...","[-34.367096, -32.27243, -31.112637, -33.611458...",2b88818c,42.5591,1


In [11]:
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
X_train, X_test, y_train, y_test = train_test_split(train_images,
                                                   to_categorical(train_df['is_iceberg']),
                                                    random_state = 2017,
                                                    test_size = 0.5
                                                   )
print('Train', X_train.shape, y_train.shape)
print('Validation', X_test.shape, y_test.shape)

Using TensorFlow backend.


Train (802, 75, 75, 2) (802, 2)
Validation (802, 75, 75, 2) (802, 2)


In [16]:
from keras.models import Sequential
from keras.layers import Conv2D, BatchNormalization, Dropout, MaxPooling2D, GlobalMaxPooling2D, Dense
simple_cnn = Sequential()
simple_cnn.add(BatchNormalization(input_shape = (75, 75, 2)))
for i in range(4):
    simple_cnn.add(Conv2D(8*2**i, kernel_size = (3,3)))
    simple_cnn.add(MaxPooling2D((2,2)))
simple_cnn.add(GlobalMaxPooling2D())
simple_cnn.add(Dropout(0.5))
simple_cnn.add(Dense(8))
simple_cnn.add(Dense(2, activation = 'softmax'))
simple_cnn.compile(optimizer='sgd', loss = 'binary_crossentropy', metrics = ['accuracy'])
simple_cnn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_2 (Batch (None, 75, 75, 2)         8         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 73, 73, 8)         152       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 36, 36, 8)         0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 34, 34, 16)        1168      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 17, 17, 16)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 15, 15, 32)        4640      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 7, 7, 32)          0         
__________

In [17]:
simple_cnn.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 100, shuffle = True)

Train on 802 samples, validate on 802 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
E

<keras.callbacks.History at 0x7fb61ddcbdd8>

In [18]:
test_predictions = simple_cnn.predict(test_images)

In [19]:
pred_df = test_df[['id']].copy()
pred_df['is_iceberg'] = test_predictions[:,1]
pred_df.to_csv('predictions.csv', index = False)
pred_df.sample(3)

Unnamed: 0,id,is_iceberg
4936,f084666d,0.070851
7540,16105a8c,0.280635
4504,719ecd9f,0.003676
