In [1]:
import pandas as pd 
import numpy as np 
import json
import cv2


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator # data augmentation
from keras.optimizers import Adam, SGD
from keras.callbacks import EarlyStopping



import matplotlib.pyplot as plt 
%matplotlib inline 

Using TensorFlow backend.
  return f(*args, **kwds)


# Loading the Training and the Test Sets

In [2]:
train = pd.read_json('train/processed/train.json')
test = pd.read_json('test/processed/test.json')

In [3]:
train.head()

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1604 entries, 0 to 1603
Data columns (total 5 columns):
band_1        1604 non-null object
band_2        1604 non-null object
id            1604 non-null object
inc_angle     1604 non-null object
is_iceberg    1604 non-null int64
dtypes: int64(1), object(4)
memory usage: 75.2+ KB


In [5]:
train['inc_angle'].value_counts().head()

na         133
34.4721     23
42.5591     16
33.6352     15
36.1061     15
Name: inc_angle, dtype: int64

We right away notice that there are some NA valus in the inc_angle column 

In [6]:
train['inc_angle'].value_counts().head().index[0]

'na'

In [7]:
'''Replacing the NAs with 0s'''
train['inc_angle'] = train.inc_angle.replace(train['inc_angle'].value_counts().head().index[0], 0)

In [8]:
'''Validating the Replacements'''
train['inc_angle'].value_counts().head()

0.0000     133
34.4721     23
42.5591     16
33.6352     15
36.1061     15
Name: inc_angle, dtype: int64

In [9]:
train.head()

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0


In [10]:
train.is_iceberg.value_counts()

0    851
1    753
Name: is_iceberg, dtype: int64

In [11]:
'''Preprocessing the image to be fed into the model'''

b1 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train["band_1"]])
b2 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train['band_2']])

a = (b1 - b1.mean()) / (b1.max() - b1.min())
b = (b2 - b2.mean()) / (b2.max() - b2.min())
b3 = a + b
c = (b3 - b3.mean()) / (b3.max() - b3.min())


# # b1 = b1/255.
# # b2 = b2/255.
X = np.stack([a,b,c], axis=-1)

# X = np.dstack((a,b,c))
y = to_categorical(train['is_iceberg'])

In [12]:
# X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
# X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
# X = np.concatenate([X_band_1[:, :, :, np.newaxis], X_band_2[:, :, :, np.newaxis],((X_band_1+X_band_2)/2)[:, :, :, np.newaxis]], axis=-1)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

In [14]:
X_train.shape

(1203, 75, 75, 3)

In [15]:
y_test.shape

(401, 2)

# Buidling a Simple Keras Model

In [16]:
'''Setting up the Model'''
model=Sequential()

'''Setting up the Layers'''
model.add(Conv2D(filters=15, kernel_size=(5,5), strides=(1,1), input_shape=(75,75,3), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(filters=30, kernel_size=(4,4), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.2))


model.add(Conv2D(filters=45, kernel_size=(4,4), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(filters=60, kernel_size=(4,4), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.2))



model.add(Flatten())
model.add(Dense(50, activation='relu'))

'''Setting up the Output'''
model.add(Dense(2, activation='sigmoid'))

In [17]:
'''Compiling the Model'''

optimizer = Adam(lr=0.001)
# earlystop = EarlyStopping(monitor='val_loss', min_delta=0.1, patience=10, verbose=0, mode='auto')
# callbacks_list = [earlystop]

model.compile(optimizer=optimizer, loss='categorical_crossentropy',metrics = ['accuracy'])


In [18]:
'''Fitting the model'''
# model.fit(X_train, y_train, validation_data=[X_test, y_test], epochs=35,\
#           verbose=1, batch_size=32)

model.fit(X, y, validation_split=0.2, epochs=35, verbose=1)

Train on 1283 samples, validate on 321 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


<keras.callbacks.History at 0x1a26740b8>

In [19]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 71, 71, 15)        1140      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 35, 35, 15)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 35, 35, 15)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 30)        7230      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 30)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 16, 16, 30)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 13, 13, 45)        21645     
__________

# Getting Ready for some predictions 

In [20]:
'''Preprocessing the test image to be fed into the model'''
b1_test = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in test["band_1"]])
b2_test = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in test['band_2']])

# b1_test = b1_test/255.
# b2_test = b2_test/255.

b1_test = (b1_test - b1_test.mean()) / (b1_test.max() - b1_test.min())
b2_test = (b2_test - b2_test.mean()) / (b2_test.max() - b2_test.min())

b3_test = b1_test + b2_test
b3_test = (b3_test - b3_test.mean()) / (b3_test.max() - b3_test.min())

X = np.stack([b1_test,b2_test, b3_test], axis=-1)

In [21]:
pred = model.predict_classes(X)

In [22]:
pred[0:10]

array([0, 1, 0, 1, 1, 1, 0, 1, 1, 0])

In [23]:
submit_df = pd.DataFrame({'id': test['id'], 'is_iceberg': pred})
submit_df.to_csv('naive_submission.csv', index=False)