source: https://www.kaggle.com/tomras/cnn-classifier-using-keras

In [24]:
import json
import numpy as np
from matplotlib import pyplot as plt
from keras.utils.np_utils import to_categorical

from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import Adam, SGD
from sklearn.model_selection import train_test_split

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from keras.utils.vis_utils import plot_model
import pydot
import load_ship_data as lsd
import time

Note: This notebook assumes keras-gpu is installed instead of the regular keras

Note: You'll need to downgrade to python 3.7 to install keras-gpu

Check for GPUs

In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 10198986979239463533
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 6610781471
locality {
  bus_id: 1
  links {
  }
}
incarnation: 10699102043536263858
physical_device_desc: "device: 0, name: GeForce RTX 2070 with Max-Q Design, pci bus id: 0000:01:00.0, compute capability: 7.5"
]


load data

In [51]:
np.random.seed(522)
data_path="data/shipsnet.json"
train,test,valid=lsd.load_data_train_test_split(data_path)
ship_prob=np.sum(train[1])*1.0/train[1].shape[0]
p=np.array([1-ship_prob,ship_prob])
print(p)
Xtrain=train[0]
ytrain=train[1]
Xtest=test[0]
ytest=test[1]
Xvalid=valid[0]
yvalid=valid[1]
#Xtrain=Xtrain.reshape(Xtrain.shape[0],Xtrain.shape[1]*Xtrain.shape[2]*Xtrain.shape[3])
#Xtest=Xtest.reshape(Xtest.shape[0],Xtest.shape[1]*Xtest.shape[2]*Xtest.shape[3])
#Xvalid=Xvalid.reshape(Xvalid.shape[0],Xvalid.shape[1]*Xvalid.shape[2]*Xvalid.shape[3])
print(Xtrain.shape)
print(Xtest.shape)
print(Xvalid.shape)
Xtrain=np.concatenate((Xtrain,Xvalid))
Xtrain=Xtrain/255
Xtest=Xtest/255
ytrain=np.concatenate((ytrain,yvalid))
y_train_mono=ytrain
y_test_mono=ytest
ytrain=to_categorical(ytrain,num_classes=2)
ytest=to_categorical(ytest,num_classes=2)
print(Xtrain.shape)
print(ytrain.shape)
print(y_train_mono.shape,Xtrain.shape)

Training Set Data Length:  2800   Label Length:  2800
TestingSet Set Data Length:  600  Label Length:  600
Validation Set Data Length:  600  Label Length:  600
[0.7525 0.2475]
(2800, 3, 80, 80)
(600, 3, 80, 80)
(600, 3, 80, 80)
(3400, 3, 80, 80)
(3400, 2)
(3400,) (3400, 3, 80, 80)


In [52]:
Xtrain=Xtrain.reshape(-1,80,80,3)
Xtest=Xtest.reshape(-1,80,80,3)
print(Xtrain.shape)
print(Xtest.shape)
X_train=Xtrain
X_test=Xtest
y_train=ytrain
y_test=ytest

(3400, 80, 80, 3)
(600, 80, 80, 3)


define CNN

In [58]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding="same", input_shape=(80, 80, 3), activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
    
model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01, momentum=0.9, nesterov=True), 
              metrics=['accuracy'])

train the model

In [14]:
history = model.fit(X_train, y_train, batch_size=32, epochs=40, validation_split=0.15)
score = model.evaluate(X_test, y_test) 
print('Test loss:', score[0]) 
print('Test accuracy:', score[1])

Train on 2890 samples, validate on 510 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Test loss: 0.030714893206249295
Test accuracy: 0.9900000095367432


In [15]:
print(model.summary())


Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_25 (Conv2D)           (None, 80, 80, 32)        896       
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 78, 78, 32)        9248      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 39, 39, 32)        0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 39, 39, 32)        0         
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 39, 39, 64)        18496     
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 37, 37, 64)        36928     
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 18, 18, 64)       

In [59]:
def accuracy_score(y, y_model):
    # calculate classification overall accuracy and classwise accuracy
    
    assert len(y) == len(y_model)
    classn = len(np.unique(y))       # number of different classes
    correct_all = y == y_model       # all correct classifications
    acc_overall = np.sum(correct_all) / len(y)
    acc_i = np.zeros(classn)
    for i in range(classn):   
        GT_i = y == i                # samples actually belong to class i
        acc_i[i] = (np.sum(GT_i & correct_all) / np.sum(GT_i))
        
    return acc_i, acc_overall
def conf(pred,y):
    T0=sum([1 if x==y and y==0 else 0 for (x,y) in zip(pred,y)])
    T1=sum([1 if x==y and y==1 else 0 for (x,y) in zip(pred,y)])
    F0=sum([1 if x!=y and y==0 else 0 for (x,y) in zip(pred,y)])
    F1=sum([1 if x!=y and y==1 else 0 for (x,y) in zip(pred,y)])
    return np.array([[T0,F0],[F1,T1]])

In [65]:
X=X_train
y=y_train
k=5
kf=StratifiedKFold(n_splits=k,random_state=None)
acc_scores=[]
conf_mat=[]
times=[]
print(X.shape)
print(y.shape)
print(y_train_mono.shape)


(3400, 80, 80, 3)
(3400, 2)
(3400,)
[]


Although normally we could validate a Keras model using cross_val_score from scikit, in order to validate using the same Stratified KFold as our classifiers, we perform the cross validation manually below. cross_val_score can be fed a StratifiedKFold option, but this option is not compatible with the one-hot encoding used by the Kaggle CNN.

In [66]:
for train_index,test_index in kf.split(X,y_train_mono):
    X_tr,X_te=X[train_index,:],X[test_index,:]
    y_tr,y_te=y[train_index],y[test_index]
    y_tr_m,y_te_m=y_train_mono[train_index],y_train_mono[test_index]
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding="same", input_shape=(80, 80, 3), activation='relu'))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01, momentum=0.9, nesterov=True), 
              metrics=['accuracy'])
    t0=time.time()
    model.fit(X_tr, y_tr, batch_size=32, epochs=40, validation_split=None)
    y_model = model.predict(X_te)
    t1=time.time()
    t=t1-t0
    times.append(t)
    y_model=np.argmax(y_model,axis=1)
    acc_class,acc_overall=accuracy_score(y_te_m, y_model)
    acc_scores.append(acc_overall)
    conf_mat.append(conf(y_model,y_te_m))

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40


Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40


Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40


Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40


Epoch 39/40
Epoch 40/40


In [67]:
avg_acc=np.sum(acc_scores)*1.0/k
print('Accuracy from each fold =  {}'.format(acc_scores))
print('Average Accuracy = {}'.format(avg_acc))
print('Average Runtime = {}'.format(np.sum(np.array(times))*1.0/5))
class0_acc=np.array([x[0][0]*1.0/(x[0][0]+x[0][1]) for x in conf_mat])
class0_acc=np.sum(class0_acc)*1.0/5
class1_acc=np.array([x[1][1]*1.0/(x[1][0]+x[1][1]) for x in conf_mat])
class1_acc=np.sum(class1_acc)*1.0/5
print(class0_acc)
print(class1_acc)

Accuracy from each fold =  [0.9926470588235294, 0.9897058823529412, 0.9941176470588236, 0.9882352941176471, 0.9941176470588236]
Average Accuracy = 0.991764705882353
Average Runtime = 71.82766485214233
0.9937254901960785
0.9858823529411765
