### LOAD DATASET

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from glob import glob 
import time
from datetime import datetime
from sklearn.model_selection import KFold, train_test_split
from sklearn.utils import shuffle

from keras.utils import np_utils
from keras.models import Sequential 
from keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D, ZeroPadding2D
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### DATA PREPROCESSING 

In [2]:
img_h = 224
img_w = 224
def read_img(path,):
    'read, resize, and convert an image to grayscale'
    img = cv2.imread(path)
    resize = cv2.resize(img, (img_h, img_w), cv2.INTER_LINEAR)
    #gray = cv2.cvtColor(resize, cv2.COLOR_BGR2GRAY)
    return resize

In [3]:
type_paths = glob('/storage/Documents/springboard_capstone/capstone1/train/*')
test_paths = glob('/storage/Documents/springboard_capstone/capstone1/test/*')

In [4]:
train_paths = []
for path in type_paths:
    train_paths += glob(path+'/*')

In [5]:
def load_train(paths):
    train_data = []
    train_target = []
    train_id = []
    start_time = time.time()
    for path in paths:
        train_id.append(path)
        train_data.append(read_img(path))
        target = path.split('/')[-2]
        train_target.append(target)
    print ('Training data load time: {}'.format(time.time() - start_time))
    return train_data, train_target, train_id

def load_test(paths):
    test_data = []
    test_id = []
    start_time = time.time()
    for path in paths:
        test_id.append(path)
        test_data.append(read_img(path))
    print ('Testing data load time: {}'.format(time.time() - start_time))
    return test_data, test_id

In [6]:
train_data, train_target, train_id_ = load_train(train_paths)
test_data, test_id_ = load_test(test_paths)

Training data load time: 342.038950920105
Testing data load time: 123.54733157157898


In [7]:
def normalize_data(data):
    data = np.array(data, dtype=np.uint8)
    data = data.astype('float32')
    data = data/255
    return data

train_data = normalize_data(train_data)
print ('Shape of the training data is ', train_data.shape)

Shape of the training data is  (1481, 224, 224, 3)


In [8]:
test_data = normalize_data(test_data)
print ('Shape of the training data is ', test_data.shape)

Shape of the training data is  (503, 224, 224, 3)


In [9]:
# Convert train_target to one-hot-encoding before fitting into the model
train_label = [int(x.split('_')[1]) for x in train_target]
train_label = np_utils.to_categorical(train_label)
train_label = np.transpose(train_label)
train_label = np.transpose(train_label[~(train_label==0).all(axis=1)])

In [10]:
# Create validation split 
X_train, X_valid, Y_train, Y_valid = train_test_split(train_data, train_label, test_size=0.2, random_state=22)

### BUILDING MODEL

In [11]:
#Training hyperparameters
epochs = 50
batch_size = 16

In [12]:
#Early stopping callback
patience = 4
early_stopping = EarlyStopping(monitor='val_acc', min_delta=0.05, 
                              patience = patience, verbose=0, mode='auto')

In [13]:
#Build the first model with a simple stack of 3 convolution layers with a ReLU activation and max_pooling

model1 = Sequential()
model1.add(Conv2D(32, (3, 3), input_shape=(img_h, img_w, 3), data_format='channels_last'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model1.add(Dropout(0.25))

model1.add(Conv2D(64, (3, 3), data_format='channels_last'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model1.add(Dropout(0.25))

model1.add(Conv2D(128, (3,3), data_format='channels_last'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model1.add(Dropout(0.25))

#Add flatten
model1.add(Flatten())
model1.add(Dense(256, activation='relu'))
model1.add(Dropout(0.5))
model1.add(Dense(3, activation='softmax'))

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model1.compile(loss='categorical_crossentropy', 
             optimizer=sgd,
             metrics = ['accuracy'])

In [14]:
model1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 222, 222, 32)      896       
_________________________________________________________________
activation_1 (Activation)    (None, 222, 222, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 111, 111, 32)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 109, 109, 64)      18496     
_________________________________________________________________
activation_2 (Activation)    (None, 109, 109, 64)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 54, 54, 64)        0         
__________

In [18]:
start_time = time.time()

model1.fit(X_train, Y_train, 
         batch_size=batch_size, epochs=epochs, 
         callbacks=[early_stopping],
         validation_data=(X_valid, Y_valid), shuffle=True)

end_time = time.time()
print ('The time for fitting training data into model 1 is {}'.format(end_time-start_time))

Train on 1184 samples, validate on 297 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
The time for fitting training data into model 1 is 1639.1563205718994


### Very Deep Convolutional NetWorks for Large-Scale Image Recognition (VGG16 Model)

In [23]:
from keras.applications.vgg16 import VGG16
from keras.models import Model 

vgg16_model = VGG16(weights = 'imagenet', include_top=False, 
                   input_shape=(img_w, img_h, 3))
top_model = Sequential()
top_model.add(Flatten(input_shape = vgg16_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(3,activation='softmax'))

model2 = Model(input=vgg16_model.input, output=top_model(vgg16_model.output))
model2.compile(loss='categorical_crossentropy', 
             optimizer=sgd,
             metrics = ['accuracy'])

  if sys.path[0] == '':


In [24]:
model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [25]:
start_time = time.time()
model2.fit(X_train, Y_train, 
         batch_size=batch_size, epochs=epochs, 
         callbacks=[early_stopping], 
         validation_data=(X_valid, Y_valid), shuffle=True)

end_time = time.time()
print ('The time for fitting training data into model 2 is {}'.format(end_time-start_time))
model2.save_weights('model2.h5')

Train on 1184 samples, validate on 297 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
The time for fitting training data into model 2 is 24517.816619873047


### ResNet Model

In [26]:
from keras.applications.resnet50 import ResNet50
from keras.models import Model 

resnet50_model = ResNet50(weights = 'imagenet', include_top=False, 
                   input_shape=(img_w, img_h, 3))
add_model = Sequential()
add_model.add(Flatten(input_shape =resnet50_model.output_shape[1:]))
add_model.add(Dense(256, activation='relu'))
add_model.add(Dropout(0.5))
add_model.add(Dense(3,activation='softmax'))

model3 = Model(input=resnet50_model.input, output=add_model(resnet50_model.output))
model3.compile(loss='categorical_crossentropy', 
             optimizer=sgd,
             metrics = ['accuracy'])

  if sys.path[0] == '':


In [27]:
start_time = time.time()
model3.fit(X_train, Y_train, 
         batch_size=batch_size, epochs=epochs, 
         callbacks=[early_stopping], 
         validation_data=(X_valid, Y_valid), shuffle=True)

end_time = time.time()
print ('The time for fitting training data into model 3 is {}'.format(end_time-start_time))

model3.save_weights('model3.h5')

Train on 1184 samples, validate on 297 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
The time for fitting training data into model 3 is 13474.341937065125


### TUNING ON MODEL 1 USING DIFFERENT COMBINATION OF LOSS FUNCTION AND OPTIMIZER

In [34]:
def build_model(loss_function, optimizer):
    model = Sequential()
    kernel_size=(3,3)
    data_size=(224,224,3)
    for i in range(5, 8):
        model.add(Conv2D(2^i, kernel_size, input_shape=data_size, data_format='channels_last'))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
        model.add(Dropout(0.25))
        
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss=loss_function, optimizer = optimizer, 
                 metrics=['accuracy'])
    return model

In [35]:
from keras import optimizers
loss_functions = ['mean_squared_error', 
                 'categorical_crossentropy', 
                 'binary_crossentropy']

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
adadelta = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

optimizers = [sgd, adadelta, adam]

In [36]:
combinations = [loss_functions, optimizers]
import itertools
combinations = list(itertools.product(*combinations))
print ('The number of combinations is ', len(combinations))

The number of combinations is  9


In [37]:
#Mew Early stopping callback
patience = 10
early_stopping = EarlyStopping(monitor='val_acc', min_delta=0.01, 
                              patience = patience, verbose=0, mode='auto')

In [57]:
for i in range(len(combinations)):
    loss_function = combinations[i][0]
    optimizer = combinations[i][1]
    print ('Loss function: ', loss_function)
    print ('Optimizer: ', optimizer)
    start_time = time.time()
    model = build_model(loss_function, optimizer)
    modelcheckpoint = ModelCheckpoint('Model1-{}.h5'.format(str(i)), 
                                            monitor='val_acc', 
                                            verbose=1, save_best_only=True,  
                                            mode='auto', period=1)
    model.fit(X_train, Y_train, 
              batch_size=batch_size, 
             epochs=epochs, 
             callbacks=[modelcheckpoint, early_stopping], 
             validation_data=[X_valid, Y_valid])
    print ('Training time for this model is {}'.format(time.time()-start_time))
    print ('')
    print ('')

Loss function:  mean_squared_error
Optimizer:  <keras.optimizers.SGD object at 0x2b4edb69c278>
Train on 1184 samples, validate on 297 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.51178, saving model to Model1-0.h5
Epoch 2/50

Epoch 00002: val_acc did not improve from 0.51178
Epoch 3/50

Epoch 00003: val_acc did not improve from 0.51178
Epoch 4/50

Epoch 00004: val_acc did not improve from 0.51178
Epoch 5/50

Epoch 00005: val_acc improved from 0.51178 to 0.52525, saving model to Model1-0.h5
Epoch 6/50

Epoch 00006: val_acc did not improve from 0.52525
Epoch 7/50

Epoch 00007: val_acc did not improve from 0.52525
Epoch 8/50

Epoch 00008: val_acc did not improve from 0.52525
Epoch 9/50

Epoch 00009: val_acc improved from 0.52525 to 0.52525, saving model to Model1-0.h5
Epoch 10/50

Epoch 00010: val_acc did not improve from 0.52525
Epoch 11/50

Epoch 00011: val_acc did not improve from 0.52525
Epoch 12/50

Epoch 00012: val_acc improved from 0.52525 to 0.53199, saving mod


Epoch 00017: val_acc did not improve from 0.54209
Epoch 18/50

Epoch 00018: val_acc did not improve from 0.54209
Epoch 19/50

Epoch 00019: val_acc did not improve from 0.54209
Epoch 20/50

Epoch 00020: val_acc did not improve from 0.54209
Epoch 21/50

Epoch 00021: val_acc did not improve from 0.54209
Epoch 22/50

Epoch 00022: val_acc did not improve from 0.54209
Training time for this model is 941.0249173641205


Loss function:  mean_squared_error
Optimizer:  <keras.optimizers.Adam object at 0x2b4edb69c320>
Train on 1184 samples, validate on 297 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.52525, saving model to Model1-2.h5
Epoch 2/50

Epoch 00002: val_acc did not improve from 0.52525
Epoch 3/50

Epoch 00003: val_acc did not improve from 0.52525
Epoch 4/50

Epoch 00004: val_acc improved from 0.52525 to 0.52862, saving model to Model1-2.h5
Epoch 5/50

Epoch 00005: val_acc did not improve from 0.52862
Epoch 6/50

Epoch 00006: val_acc improved from 0.52862 to 0.53199,


Epoch 00012: val_acc did not improve from 0.53199
Epoch 13/50

Epoch 00013: val_acc did not improve from 0.53199
Epoch 14/50

Epoch 00014: val_acc did not improve from 0.53199
Training time for this model is 556.4695749282837


Loss function:  categorical_crossentropy
Optimizer:  <keras.optimizers.Adam object at 0x2b4edb69c320>
Train on 1184 samples, validate on 297 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.51178, saving model to Model1-5.h5
Epoch 2/50

Epoch 00002: val_acc did not improve from 0.51178
Epoch 3/50

Epoch 00003: val_acc did not improve from 0.51178
Epoch 4/50

Epoch 00004: val_acc did not improve from 0.51178
Epoch 5/50

Epoch 00005: val_acc did not improve from 0.51178
Epoch 6/50

Epoch 00006: val_acc did not improve from 0.51178
Epoch 7/50

Epoch 00007: val_acc did not improve from 0.51178
Epoch 8/50

Epoch 00008: val_acc did not improve from 0.51178
Epoch 9/50

Epoch 00009: val_acc did not improve from 0.51178
Epoch 10/50

Epoch 00010: val_acc 


Epoch 00007: val_acc did not improve from 0.66667
Epoch 8/50

Epoch 00008: val_acc did not improve from 0.66667
Epoch 9/50

Epoch 00009: val_acc did not improve from 0.66667
Epoch 10/50

Epoch 00010: val_acc did not improve from 0.66667
Epoch 11/50

Epoch 00011: val_acc did not improve from 0.66667
Training time for this model is 446.62636947631836


Loss function:  binary_crossentropy
Optimizer:  <keras.optimizers.Adam object at 0x2b4edb69c320>
Train on 1184 samples, validate on 297 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.66667, saving model to Model1-8.h5
Epoch 2/50

Epoch 00002: val_acc did not improve from 0.66667
Epoch 3/50

Epoch 00003: val_acc did not improve from 0.66667
Epoch 4/50

Epoch 00004: val_acc did not improve from 0.66667
Epoch 5/50

Epoch 00005: val_acc did not improve from 0.66667
Epoch 6/50

Epoch 00006: val_acc did not improve from 0.66667
Epoch 7/50

Epoch 00007: val_acc did not improve from 0.66667
Epoch 8/50

Epoch 00008: val_acc did n

In [83]:
from keras.models import load_model
model1_paths = glob('Model1-[0-9].h5')
scores = []
accs = []
for path in model1_paths:
    model = load_model(path)
    score, acc = model.evaluate(X_valid, Y_valid)
    scores.append(score)
    accs.append(acc)



In [90]:
best_acc = max(accs)
best_acc_idx = accs.index(best_acc)
best_combo = combinations[best_acc_idx]
print ('The best loss-function/optimizer combination is ', combinations[best_acc_idx])

The best loss-function/optimizer combination is  ('mean_squared_error', <keras.optimizers.Adagrad object at 0x2b4edb69c240>)


According to the results listed above, the combination between mean_squared_error as a loss function and adagrad optimizer is the best combo out of the 9 tested combos. Now let's see if changing the number of batch-size from 16 to 200 would give us a better result. 

In [None]:
loss_function = best_combo[0]
optimizer = best_combo[1]
print ('Loss function: ', loss_function)
print ('Optimizer: ', optimizer)
start_time = time.time()
model1_opt1 = build_model(loss_function, optimizer)
modelcheckpoint = ModelCheckpoint('Model1-op1.h5'.format(str(i)), 
                                        monitor='val_acc', 
                                        verbose=1, save_best_only=True,  
                                        mode='auto', period=1)
model1_opt1.fit(X_train, Y_train, 
          batch_size=200, 
         epochs=epochs, 
         callbacks=[modelcheckpoint, early_stopping], 
         validation_data=[X_valid, Y_valid])
print ('Training time for this model is {}'.format(time.time()-start_time))