In [268]:
import pandas as pd 
import numpy as np 
import json

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import to_categorical



import matplotlib.pyplot as plt 
%matplotlib inline 

# Loading the Training and the Test Sets

In [269]:
train = pd.read_json('train/processed/train.json')
test = pd.read_json('test/processed/test.json')

In [270]:
train.head()

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0


In [271]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1604 entries, 0 to 1603
Data columns (total 5 columns):
band_1        1604 non-null object
band_2        1604 non-null object
id            1604 non-null object
inc_angle     1604 non-null object
is_iceberg    1604 non-null int64
dtypes: int64(1), object(4)
memory usage: 75.2+ KB


In [272]:
train['inc_angle'].value_counts().head()

na         133
34.4721     23
42.5591     16
36.1061     15
33.6352     15
Name: inc_angle, dtype: int64

We right away notice that there are some NA valus in the inc_angle column 

In [273]:
train['inc_angle'].value_counts().head().index[0]

'na'

In [274]:
'''Replacing the NAs with 0s'''
train['inc_angle'] = train.inc_angle.replace(train['inc_angle'].value_counts().head().index[0], 0)

In [275]:
'''Validating the Replacements'''
train['inc_angle'].value_counts().head()

0.0000     133
34.4721     23
42.5591     16
33.6352     15
36.1061     15
Name: inc_angle, dtype: int64

In [276]:
train.head()

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0


In [277]:
train.is_iceberg.value_counts()

0    851
1    753
Name: is_iceberg, dtype: int64

In [278]:
'''Preprocessing the image to be fed into the model'''

b1 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train["band_1"]])
b2 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train['band_2']])

b1 = b1/255.
b2 = b2/255.

In [279]:
X = np.stack([b1,b2], axis=3)
y = to_categorical(train['is_iceberg'])

In [280]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [281]:
X_train.shape

(1283, 75, 75, 2)

In [282]:
y_test.shape[1]

2

# Buidling a Simple Keras Model

In [283]:
'''Setting up the Model'''
model=Sequential()

'''Setting up the Layers'''
model.add(Conv2D(filters=15, kernel_size=(5,5), strides=(1,1), input_shape=(75,75,2), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=30, kernel_size=(4,4), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=45, kernel_size=(4,4), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(50, activation='relu'))

'''Setting up the Output'''
model.add(Dense(2, activation='sigmoid'))

In [284]:
'''Compiling the Model'''
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['accuracy'])

In [285]:
'''Fitting the model'''
model.fit(X_train,y_train, validation_data=[X_test, y_test], epochs=50, verbose=1)

Train on 1283 samples, validate on 321 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x43147f9b0>

In [286]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_52 (Conv2D)           (None, 71, 71, 15)        765       
_________________________________________________________________
max_pooling2d_52 (MaxPooling (None, 35, 35, 15)        0         
_________________________________________________________________
conv2d_53 (Conv2D)           (None, 32, 32, 30)        7230      
_________________________________________________________________
max_pooling2d_53 (MaxPooling (None, 16, 16, 30)        0         
_________________________________________________________________
conv2d_54 (Conv2D)           (None, 13, 13, 45)        21645     
_________________________________________________________________
max_pooling2d_54 (MaxPooling (None, 6, 6, 45)          0         
_________________________________________________________________
flatten_16 (Flatten)         (None, 1620)              0         
__________

# Getting Ready for some predictions 

In [287]:
'''Preprocessing the test image to be fed into the model'''
b1_test = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in test["band_1"]])
b2_test = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in test['band_2']])

b1_test = b1_test/255.
b2_test = b2_test/255.

In [288]:
X_test_submission = np.stack([b1_test,b2_test], axis=3)

In [289]:
pred = model.predict_classes(X_test_submission)

In [290]:
pred[0:10]

array([0, 1, 1, 1, 1, 0, 0, 1, 0, 0])

In [291]:
submit_df = pd.DataFrame({'id': test['id'], 'is_iceberg': pred})
submit_df.to_csv('naive_submission.csv', index=False)