# Import packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
import json
%matplotlib inline

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Activation, BatchNormalization, GlobalMaxPooling2D
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical

Using TensorFlow backend.


# Functions

In [4]:
def get_images(df):
    images = []
    
    for idx, row in df.iterrows():
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)

        # Pre-Processing:
        #  - (Re)scaling
        #  - Standardization
        #  - Stretching
        #  - ...
        # band_1 /= 255
        # band_2 /= 255
        

        bands = np.dstack((band_1, band_2))
        images.append(bands)
        
    return np.array(images)


def plot_acc(histobj):
    plt.figure(figsize=(10,10))
    plt.plot(histobj.history['acc'])
    plt.plot(histobj.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()
    
    
def plot_loss(histobj):
    plt.figure(figsize=(10,10))
    plt.plot(histobj.history['loss'])
    plt.plot(histobj.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()
      
    
def show_image(img):
    fig = plt.figure(figsize=(12, 5))
    ax = plt.subplot(1, 2, 1)
    ax.imshow(img[:, :, 0], cmap=cm.inferno)
    ax.set_title('Band 1')

    ax = plt.subplot(1, 2, 2)
    im = ax.imshow(img[:, :, 1], cmap=cm.inferno)
    ax.set_title('Band 2')
    
    cax = fig.add_axes([0.95, 0.1, 0.03, 0.8])
    fig.colorbar(im, cax=cax, label='[dB]')

    plt.show()
    
    
def get_class(pred, label, img):
    classes = ['ship', 'iceberg']
    pred_i = np.argmax(pred)
    label_i = np.argmax(label)
    print('Prediction class = {}'.format(classes[pred_i]))
    print('Prediction value (%) = {}'.format(pred[pred_i]))
    print('Label class = {}'.format(classes[label_i]))
    show_image(img)

# Import data


In [5]:
train_df = pd.read_json('C:/Saudin/data/train.json', dtype='float32')
test_df = pd.read_json('C:/Saudin/data/test.json', dtype='float32')

In [6]:
train_df.head(5)

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0.0
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0.0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1.0
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0.0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0.0


In [7]:
X = get_images(train_df)
Y = to_categorical(train_df.is_iceberg.values, num_classes=2) # [0. 1.]=iceberg, [1. 0.]=ship
TEST = get_images(test_df)
TEST_labels = test_df['id']

In [13]:
train_X, val_X, train_Y, val_Y = train_test_split(X, Y, test_size=0.10,  shuffle=True, random_state=12)

# CNN Model Configuration

In [14]:
model = Sequential()
model.add(BatchNormalization(input_shape = (75, 75, 2)))
model.add(Conv2D(32, kernel_size = (5,5)))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(64, kernel_size = (4,4)))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(128, kernel_size = (3,3)))
model.add(MaxPooling2D((2,2)))
model.add(GlobalMaxPooling2D())
model.add(Dropout(0.5))
model.add(Dense(8))
model.add(Dense(2, activation = 'softmax'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_1 (Batch (None, 75, 75, 2)         8         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 71, 71, 32)        1632      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 35, 35, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 64)        32832     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 7, 7, 128)         0         
__________

In [None]:
model = Sequential()
model.add(BatchNormalization(input_shape = (75, 75, 2)))
for i in range(4):
    model.add(Conv2D(8*2**i, kernel_size = (3,3)))
    model.add(MaxPooling2D((2,2)))
model.add(GlobalMaxPooling2D())
model.add(Dropout(0.5))
model.add(Dense(8))
model.add(Dense(2, activation = 'softmax'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [15]:
model.compile(optimizer='adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [16]:
model_hist = model.fit(train_X, train_Y, validation_data=(val_X, val_Y), batch_size=32, epochs=10)

Train on 1443 samples, validate on 161 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
plot_loss(model_hist)

In [None]:
plot_acc(model_hist)

In [None]:
predicts = model.predict(train_X, batch_size=32)

In [None]:
val_preds = model.predict(val_X, batch_size=32)

In [19]:
test_preds = model.predict(TEST, batch_size=32)

In [None]:
sample = 100
get_class(predicts[sample], train_Y[sample], train_X[sample])

In [None]:
sample = 30
get_class(val_preds[sample], val_Y[sample], val_X[sample])

In [36]:
is_ice = test_preds[:, 1]
ids = TEST_labels

In [50]:
ids.shape

(8424,)

In [45]:
test_pd = pd.DataFrame([ids, is_ice], columns=['id', 'is_iceberg'])

In [41]:
test_pd.head(5)

Unnamed: 0,id,is_iceberg
id,,
Unnamed 0,,


In [43]:
ids.values

array(['5941774d', '4023181e', 'b20200e4', ..., 'df30d6dd', '18af95b1',
       '27d788c8'], dtype=object)

In [54]:
with open('subv1.csv', 'w') as fp:
    fp.write('id,is_iceberg\n')
    for i in range(len(TEST_labels)):
        fp.write('{0:},{1:.10f}\n'.format(TEST_labels[i], test_preds[i,1]))