# Statoil_C-CORE Iceberg Classifier Challenge

---

## Import

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from os.path import join
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = 10, 10
%matplotlib inline

## Dataset

In [2]:
train = pd.read_json("../data/train.json")
test = pd.read_json("../data/test.json")

In [3]:
train

Unnamed: 0,id,band_1,band_2,inc_angle,is_iceberg
0,dfd5f913,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",43.9239,0
1,e25388fd,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",38.1562,0
2,58b2aaa0,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",45.2859,1
3,4cfc3a18,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",43.8306,0
4,271f93f4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",35.6256,0
...,...,...,...,...,...
1599,04e11240,"[-30.999878, -29.976866, -28.233906, -29.50732...","[-27.847719, -28.233864, -24.712077999999998, ...",na,0
1600,c7d6f6f8,"[-25.31155, -26.511555, -28.694487, -27.180115...","[-29.563713, -28.290375, -26.839405, -28.29046...",na,0
1601,bba1a0f1,"[-18.141895, -18.141844, -19.01737, -19.701599...","[-25.305355, -29.387701, -28.963863, -26.16023...",na,0
1602,7f66bb44,"[-22.455633, -25.794661, -26.954567, -22.83354...","[-26.070356, -22.093737, -21.577662, -24.53376...",na,0


In [4]:
# HH
X_band_1 = np.array([np.array(band).astype(
    np.float32).reshape(75, 75) for band in train["band_1"]])
X_band_1_test = np.array([np.array(band).astype(
    np.float32).reshape(75, 75) for band in test["band_1"]])

# HV
X_band_2 = np.array([np.array(band).astype(
    np.float32).reshape(75, 75) for band in train["band_2"]])
X_band_2_test = np.array([np.array(band).astype(
    np.float32).reshape(75, 75) for band in test["band_1"]])

In [5]:
X_band_1.shape

(1604, 75, 75)

In [6]:
# HH, HV, avg of (HH, HV)
X_train = np.concatenate([X_band_1[:, :, :, np.newaxis], X_band_2[:, :, :, np.newaxis], ((
    X_band_1+X_band_2)/2)[:, :, :, np.newaxis]], axis=-1)
X_test = np.concatenate([X_band_1_test[:, :, :, np.newaxis], X_band_2_test[:, :, :, np.newaxis], ((
    X_band_1_test+X_band_2_test)/2)[:, :, :, np.newaxis]], axis=-1)

In [7]:
y_train = train["is_iceberg"]

In [8]:
X_train.shape

(1604, 75, 75, 3)

In [9]:
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, random_state=2020, train_size=0.8)

## Model

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, GlobalAveragePooling2D, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization

In [11]:
model=Sequential()
model.add(Conv2D(64, kernel_size=(3,3), activation='relu', input_shape=(75,75,3)))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(BatchNormalization())

model.add(Conv2D(128, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(BatchNormalization())

model.add(Conv2D(128, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(BatchNormalization())

model.add(Conv2D(128, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(BatchNormalization())

model.add(GlobalAveragePooling2D())

model.add(Dense(512))
model.add(Activation('relu'))
model.add(BatchNormalization())

model.add(Dense(256))
model.add(Activation('relu'))
model.add(BatchNormalization())

model.add(Dense(1))
model.add(Activation('sigmoid'))

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 73, 73, 64)        1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 36, 36, 64)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 36, 36, 64)        256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 17, 17, 128)       512       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 15, 15, 128)       1

In [13]:
model.compile(loss='binary_crossentropy',
             optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=0.9),
             metrics=['accuracy'])

## Train

In [14]:
model.fit(X_train, y_train,
         batch_size=32,
         epochs=50,
         verbose=1,
         validation_data=(X_valid, y_valid))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f04bc09cdd0>

## Prediction

In [15]:
valid_score = model.evaluate(X_valid, y_valid, verbose=1)
print('valid loss :', valid_score[0])
print('valid accuracy :', valid_score[1])

valid loss : 0.32487741112709045
valid accuracy : 0.8629283308982849


In [16]:
predictions = model.predict_proba(X_test)

Instructions for updating:
Please use `model.predict()` instead.


In [17]:
submission = pd.DataFrame()
submission['id'] = test['id']
submission['is_iceberg']=predictions.reshape((predictions.shape[0]))
submission.to_csv('../data/submission.csv', index=False)