### LOAD DATASET

In [57]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from glob import glob 
import time
from datetime import datetime
from sklearn.model_selection import KFold, train_test_split
from sklearn.utils import shuffle

from keras.utils import np_utils
from keras.models import Sequential 
from keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D, ZeroPadding2D
from keras import optimizers
from keras.callbacks import EarlyStopping, TensorBoard

train = pd.read_csv('training_images_size.csv', sep='\t')
test = pd.read_csv('testing_images_size.csv', sep='\t')

### DATA PREPROCESSING 

In [22]:
img_h = 100
img_w = 100
def read_img(path,):
    'read, resize, and convert an image to grayscale'
    img = cv2.imread(path)
    resize = cv2.resize(img, (img_h, img_w), cv2.INTER_LINEAR)
    #gray = cv2.cvtColor(resize, cv2.COLOR_BGR2GRAY)
    return resize

In [23]:
train_paths = train.imagepath.values
test_paths = glob('test/*')

In [24]:
def load_train(train_path):
    train_data = []
    train_target = []
    train_id = []
    start_time = time.time()
    for path in train_paths:
        train_id.append(path)
        train_data.append(read_img(path))
        target = path.split('/')[1]
        target = target.split('_')[1]
        train_target.append(target)
    print ('Training data load time: {}'.format(time.time() - start_time))
    return train_data, train_target, train_id

def load_test(test_paths):
    test_data = []
    test_id = []
    start_time = time.time()
    for path in test_paths:
        test_id.append(path)
        test_data.append(read_img(path))
    print ('Testing data load time: {}'.format(time.time() - start_time))
    return test_data, test_id

In [25]:
train_data, train_target, train_id = load_train(train_paths)
test_data, test_id = load_test(test_paths)

Training data load time: 261.52671217918396
Testing data load time: 91.19738173484802


In [26]:
def normalize_data(data):
    data = np.array(data, dtype=np.uint8)
    data = data.astype('float32')
    data = data/255
    return data

train_data = normalize_data(train_data)
print ('Shape of the training data is ', train_data.shape)

Shape of the training data is  (1481, 100, 100, 3)


In [27]:
test_data = normalize_data(test_data)
print ('Shape of the training data is ', test_data.shape)


Shape of the training data is  (503, 100, 100, 3)


In [28]:
# Convert train_target to one-hot-encoding before fitting into the model
train_label = np_utils.to_categorical(train_target)
train_label = np.transpose(train_label)
train_label = np.transpose(train_label[~(train_label==0).all(axis=1)])

In [29]:
# Create validation split 
X_train, X_valid, Y_train, Y_valid = train_test_split(train_data, train_label, test_size=0.2, random_state=22)

### BUILDING MODEL

In [48]:
#Training hyperparameters
epochs = 50
batch_size = 16

In [49]:
#Early stopping callback
patience = 5
early_stopping = EarlyStopping(monitor='val_acc', min_delta=0.02, 
                              patience = patience, verbose=0, mode='auto')

In [50]:
#Build the first model with a simple stack of 3 convolution layers with a ReLU activation and max_pooling

model1 = Sequential()
model1.add(Conv2D(32, (3, 3), input_shape=(img_h, img_w, 3), data_format='channels_last'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model1.add(Dropout(0.25))

model1.add(Conv2D(64, (3, 3), data_format='channels_last'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model1.add(Dropout(0.25))

model1.add(Conv2D(128, (3,3), data_format='channels_last'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model1.add(Dropout(0.25))

#Add flatten
model1.add(Flatten())
model1.add(Dense(256, activation='relu'))
model1.add(Dropout(0.5))
model1.add(Dense(3, activation='softmax'))

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model1.compile(loss='categorical_crossentropy', 
             optimizer=sgd,
             metrics = ['accuracy'])


model1.fit(X_train, Y_train, 
         batch_size=batch_size, epochs=epochs, 
         callbacks=[early_stopping], 
         validation_data=(X_valid, Y_valid), shuffle=True)

Train on 1184 samples, validate on 297 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


<keras.callbacks.History at 0x2aed6f020588>

In [96]:
model1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_35 (Conv2D)           (None, 98, 98, 32)        896       
_________________________________________________________________
activation_37 (Activation)   (None, 98, 98, 32)        0         
_________________________________________________________________
max_pooling2d_32 (MaxPooling (None, 49, 49, 32)        0         
_________________________________________________________________
dropout_33 (Dropout)         (None, 49, 49, 32)        0         
_________________________________________________________________
conv2d_36 (Conv2D)           (None, 47, 47, 64)        18496     
_________________________________________________________________
activation_38 (Activation)   (None, 47, 47, 64)        0         
_________________________________________________________________
max_pooling2d_33 (MaxPooling (None, 23, 23, 64)        0         
__________

### Very Deep Convolutional NetWorks for Large-Scale Image Recognition (VGG16 Model)

In [82]:
from tensorflow.keras.utils import get_file
WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
weights_path = get_file('vgg16_weights.h5', WEIGHTS_PATH_NO_TOP)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [101]:
from keras.applications.vgg16 import VGG16
from keras.models import Model 
from keras.layers import Input, Dense
vgg16_model = VGG16(weights = 'imagenet', include_top=False, 
                   input_shape=(img_w, img_h, 3))
top_model = Sequential()
top_model.add(Flatten(input_shape = vgg16_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(3,activation='sigmoid'))

model2 = Model(input=vgg16_model.input, output=top_model(vgg16_model.output))
model2.compile(loss='categorical_crossentropy', 
             optimizer=sgd,
             metrics = ['accuracy'])

  if sys.path[0] == '':


In [102]:
model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        (None, 100, 100, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 25, 25, 128)       0         
__________

In [None]:
model2.fit(X_train, Y_train, 
         batch_size=batch_size, epochs=epochs, 
         callbacks=[early_stopping], 
         validation_data=(X_valid, Y_valid), shuffle=True)

Train on 1184 samples, validate on 297 samples
Epoch 1/50
Epoch 2/50
 128/1184 [==>...........................] - ETA: 15:21 - loss: 1.0986 - acc: 0.1797