In [395]:
import pandas as pd 
import numpy as np 
import json

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import to_categorical



import matplotlib.pyplot as plt 
%matplotlib inline 

# Loading the Training and the Test Sets

In [396]:
train = pd.read_json('train/processed/train.json')
test = pd.read_json('test/processed/test.json')

In [397]:
train.head()

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0


In [398]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1604 entries, 0 to 1603
Data columns (total 5 columns):
band_1        1604 non-null object
band_2        1604 non-null object
id            1604 non-null object
inc_angle     1604 non-null object
is_iceberg    1604 non-null int64
dtypes: int64(1), object(4)
memory usage: 75.2+ KB


In [399]:
train['inc_angle'].value_counts().head()

na         133
34.4721     23
42.5591     16
36.1061     15
33.6352     15
Name: inc_angle, dtype: int64

We right away notice that there are some NA valus in the inc_angle column 

In [400]:
train['inc_angle'].value_counts().head().index[0]

'na'

In [401]:
'''Replacing the NAs with 0s'''
train['inc_angle'] = train.inc_angle.replace(train['inc_angle'].value_counts().head().index[0], 0)

In [402]:
'''Validating the Replacements'''
train['inc_angle'].value_counts().head()

0.0000     133
34.4721     23
42.5591     16
33.6352     15
36.1061     15
Name: inc_angle, dtype: int64

In [403]:
train.head()

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0


In [404]:
train.is_iceberg.value_counts()

0    851
1    753
Name: is_iceberg, dtype: int64

In [405]:
'''Preprocessing the image to be fed into the model'''

b1 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train["band_1"]])
b2 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train['band_2']])

b1 = b1/255.
b2 = b2/255.

In [406]:
X = np.stack([b1,b2], axis=3)
y = to_categorical(train['is_iceberg'])

In [407]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [408]:
X_train.shape

(1283, 75, 75, 2)

In [409]:
y_test.shape

(321, 2)

# Buidling a Simple Keras Model

In [410]:
'''Setting up the Model'''
model=Sequential()

'''Setting up the Layers'''
model.add(Conv2D(filters=15, kernel_size=(5,5), strides=(1,1), input_shape=(75,75,2), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=30, kernel_size=(4,4), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=45, kernel_size=(4,4), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=60, kernel_size=(4,4), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))


model.add(Flatten())
model.add(Dense(50, activation='relu'))

'''Setting up the Output'''
model.add(Dense(2, activation='sigmoid'))

In [411]:
'''Compiling the Model'''
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['accuracy'])

In [412]:
'''Fitting the model'''
model.fit(X_train,y_train, validation_data=[X_test, y_test], epochs=75, verbose=1)

Train on 1283 samples, validate on 321 samples
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75


Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


<keras.callbacks.History at 0x20fbfc710>

In [413]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_94 (Conv2D)           (None, 71, 71, 15)        765       
_________________________________________________________________
max_pooling2d_91 (MaxPooling (None, 35, 35, 15)        0         
_________________________________________________________________
conv2d_95 (Conv2D)           (None, 32, 32, 30)        7230      
_________________________________________________________________
max_pooling2d_92 (MaxPooling (None, 16, 16, 30)        0         
_________________________________________________________________
conv2d_96 (Conv2D)           (None, 13, 13, 45)        21645     
_________________________________________________________________
max_pooling2d_93 (MaxPooling (None, 6, 6, 45)          0         
_________________________________________________________________
conv2d_97 (Conv2D)           (None, 3, 3, 60)          43260     
__________

# Getting Ready for some predictions 

In [414]:
'''Preprocessing the test image to be fed into the model'''
b1_test = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in test["band_1"]])
b2_test = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in test['band_2']])

b1_test = b1_test/255.
b2_test = b2_test/255.

In [415]:
X_test_submission = np.stack([b1_test,b2_test], axis=3)

In [416]:
pred = model.predict_classes(X_test_submission)

In [417]:
pred[0:10]

array([0, 0, 1, 1, 1, 0, 0, 1, 0, 0])

In [418]:
submit_df = pd.DataFrame({'id': test['id'], 'is_iceberg': pred})
submit_df.to_csv('naive_submission.csv', index=False)