In [29]:
# import os
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
# -----
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras import initializers
from keras import regularizers
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense, ZeroPadding2D
from keras.layers import BatchNormalization
from keras.utils.np_utils import to_categorical
from keras.layers.advanced_activations import LeakyReLU
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

## As for the data
133 out of 1603 examples are missing on 'inc_angle' value.  
#### All of them are ships!

In [None]:
data_temp = pd.read_json('train.json')
sum = 0
list = []
for i in range(len(data_temp['inc_angle'])):
    if data_temp['inc_angle'][i] == "na":
        sum += 1
        list.append(i)
print("Number of datapoints with missing \'inc_angle\':", sum, "\n\n")
print("List of indices:\n\n", list)

# First half-hearted attempt
## Data and parameters

In [15]:
def channels(data):
    rgb_arrays = []
    for i, row in data.iterrows():
        
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 + band_2)/2  # maybe divide instead of adding?

        rgb = np.dstack((band_1, band_2, band_3))
        rgb_arrays.append(rgb)
        
    rgb_arrays = np.array(rgb_arrays)
    for j in range(3):
        rgb_arrays[:, :, :, j] = (rgb_arrays[:, :, :, j] - np.mean(rgb_arrays[:, :, :, j]))/np.std(rgb_arrays[:, :, :, j])
    return np.array(rgb_arrays)

In [16]:
X = pd.read_json('train.json')
Y = X['is_iceberg']  # labels

X = channels(X)
Y = to_categorical(Y)

In [23]:
image_height = 75
image_width = 75
channels = 3
number_of_classes = 2

epochs = 15
batch_size = 32

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.15)

print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(1363, 75, 75, 3)
(241, 75, 75, 3)
(1363, 2)
(241, 2)


## Model

In [24]:
model1 = Sequential()

model1.add(Conv2D(32, (3, 3), activation='relu', input_shape=(image_width, image_height, channels)))

model1.add(BatchNormalization())
model1.add(Conv2D(64, (3, 3), activation='relu'))

model1.add(BatchNormalization())
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Conv2D(128, (3, 3), activation='relu'))

model1.add(BatchNormalization())
model1.add(Conv2D(256, (3, 3), activation='relu'))

model1.add(BatchNormalization())
model1.add(Conv2D(512, (3, 3), activation='relu'))

model1.add(BatchNormalization())
model1.add(MaxPooling2D(pool_size=(2, 2)))

model1.add(Flatten())
model1.add(BatchNormalization())
model1.add(Dropout(0.5))
model1.add(Dense(256, activation='relu', use_bias=True, bias_initializer='zeros'))

model1.add(BatchNormalization())
model1.add(Dense(number_of_classes, activation='softmax'))

In [25]:
model1.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [26]:
model1.fit(X_train, Y_train,
              batch_size=batch_size, validation_data=(X_test, Y_test),
              epochs=epochs,
                shuffle=True)

Train on 1363 samples, validate on 241 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f241c770f60>

## Summary

In [27]:
Y_pred = model1.predict(X_test)
for i in range(len(Y_pred)):
    Y_pred[i, :] = np.round(Y_pred[i, :])


In [30]:
#Precision: how many 1 are true 1?
#Recall: how many 0 are true 0? 
#F1Score: balance between those two

y_comp = confusion_matrix(Y_test[:, 1], Y_pred[:, 1])
precision = y_comp[1, 1] / (y_comp[0, 1] + y_comp[1, 1])
recall = y_comp[1, 1] / (y_comp[1, 0] + y_comp[1, 1])
accuracy = accuracy_score(Y_test[:, 1], Y_pred[:, 1])
f1score = 2 * precision * recall / (precision + recall)
print("AC:%.2f PR:%.2f RE:%.2f F1:%.2f" % (accuracy, precision, recall, f1score))

AC:0.85 PR:0.76 RE:0.96 F1:0.85


In [31]:
filename = ("model_%i_%i_%i_%i.h5" % (accuracy*100, precision*100, recall*100, f1score*100))
model1.save(filename)