In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from os import listdir
from glob import glob
from PIL import Image
import os
import cv2
import scipy.ndimage as ndimage
from skimage import io

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,precision_score,recall_score
from sklearn.metrics import roc_curve,auc

In [3]:
import tensorflow as tf
from tensorflow import keras
from keras.utils import load_img, img_to_array
from keras.losses import binary_crossentropy
from keras.models import Sequential,load_model
from keras.layers import Conv2D,MaxPooling2D,Flatten,Dense,Dropout,BatchNormalization 
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras import backend as K
from keras import activations
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers


from numpy import expand_dims


In [45]:
train_df=pd.read_csv("../data/processed/trainpath_df3.csv")
val_df=pd.read_csv("../data/processed/valpath_df3.csv")
test_df=pd.read_csv("../data/processed/testpath_df3.csv")

In [33]:
def load_data(files):
    X = []
    y = []
    for file in files:
        img = load_img(file, target_size = (50,50))
        pix = img_to_array(img)
        pix /= 255
        X.append(pix)
        if(file[-5] == '1'):
             y.append(1)
        elif(file[-5] == '0'):
            y.append(0)
    return np.stack(X), y

### Data Augmentation

In [34]:
def load_aug_data(files):
    X = []
    y = []
    datagen = ImageDataGenerator(horizontal_flip=True)

    for file in files:
        if(file[-5] == '1'):
            img = load_img(file, target_size = (50,50))
            data = img_to_array(img)
            pix =data/ 255
            X.append(pix)
            y.append(1)
            samples=expand_dims(data,0)
            it = datagen.flow(samples, batch_size=1) 
            batch=it.next()
            X.append(np.squeeze(batch, axis=0))
            y.append(1)

        elif(file[-5] == '0'):
            img = load_img(file, target_size = (50,50))
            pix = img_to_array(img)
            pix /= 255
            X.append(pix)
            y.append(0)
    return np.stack(X), y

In [46]:
X_aug,y_aug=load_aug_data(train_df.path)
y_aug=np.vstack(y_aug)
X_aug.shape,y_aug.shape

((242462, 50, 50, 3), (242462, 1))

In [47]:
np.count_nonzero(y_aug == 1),np.count_nonzero(y_aug == 0)

(105912, 136550)

In [48]:
X_val,y_val=load_data(val_df.path) 
y_val=np.vstack(y_val)
X_val.shape,y_val.shape

((47262, 50, 50, 3), (47262, 1))

### Training the model with data augmentation

In [38]:
opt=optimizers.SGD(learning_rate=1e-3, momentum=0.9)

In [49]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding="same", input_shape=(50, 50, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())


model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())


model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
#model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=opt,
              metrics=['acc'])

checkpoint = ModelCheckpoint("../models/final_aug_best_sgd3.hdf5", monitor='val_acc', verbose=1,
    save_best_only=True, mode='max')
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)



In [50]:
M=model.fit(X_aug, y_aug,validation_data=(X_val, y_val),  epochs=100,batch_size=256, callbacks=[checkpoint,es])

Epoch 1/100
Epoch 1: val_acc improved from -inf to 0.70797, saving model to ../models/final_aug_best_sgd3.hdf5
Epoch 2/100
Epoch 2: val_acc did not improve from 0.70797
Epoch 3/100
Epoch 3: val_acc did not improve from 0.70797
Epoch 4/100
Epoch 4: val_acc did not improve from 0.70797
Epoch 5/100
Epoch 5: val_acc did not improve from 0.70797
Epoch 6/100
Epoch 6: val_acc did not improve from 0.70797
Epoch 7/100
Epoch 7: val_acc did not improve from 0.70797
Epoch 8/100
Epoch 8: val_acc did not improve from 0.70797
Epoch 9/100
Epoch 9: val_acc did not improve from 0.70797
Epoch 10/100
Epoch 10: val_acc did not improve from 0.70797
Epoch 11/100
Epoch 11: val_acc did not improve from 0.70797
Epoch 11: early stopping


final_imbalance_best_sgd.hdf5 : 0.8124
final_aug_best_sgd1.hdf5 : 0.
final_aug_best_sgd2.hdf5 : 0.7835

For model final_aug_best_sgd1.hdf5

In [14]:
X_test,y_test=load_data(test_df.path)
y_test=np.vstack(y_test)
X_test.shape,y_test.shape

((47599, 50, 50, 3), (47599, 1))

In [15]:
test_loss, test_acc = model.evaluate(X_test,y_test, verbose=2)

print('\nTest accuracy:', test_acc)
print('\nTest Loss:', test_loss)

1488/1488 - 13s - loss: 343.2090 - acc: 0.7127 - 13s/epoch - 9ms/step

Test accuracy: 0.7127460837364197

Test Loss: 343.208984375


In [16]:
Y_pred = model.predict(X_test)




In [17]:
y_pred = (Y_pred > 0.5).astype(np.int64)

confusion_matrix(y_test, y_pred)

array([[33925,     2],
       [13671,     1]])

In [18]:
recall = recall_score(y_test, y_pred, average='macro')
precision = precision_score(y_test, y_pred, average='macro')
print("Precision:" ,precision)
print("Recall:", recall)

Precision: 0.523051657001989
Recall: 0.5000070960448364


For model final_aug_best_sgd2.hdf5

In [28]:
X_test,y_test=load_data(test_df.path)
y_test=np.vstack(y_test)
X_test.shape,y_test.shape

((44418, 50, 50, 3), (44418, 1))

In [29]:
test_loss, test_acc = model.evaluate(X_test,y_test, verbose=2)

print('\nTest accuracy:', test_acc)
print('\nTest Loss:', test_loss)
Y_pred = model.predict(X_test)
y_pred = (Y_pred > 0.5).astype(np.int64)

confusion_matrix(y_test, y_pred)

1389/1389 - 12s - loss: 133.2797 - acc: 0.7071 - 12s/epoch - 9ms/step

Test accuracy: 0.707078218460083

Test Loss: 133.27969360351562


array([[31234,   628],
       [12383,   173]])

In [30]:
recall = recall_score(y_test, y_pred, average='macro')
precision = precision_score(y_test, y_pred, average='macro')
print("Precision:" ,precision)
print("Recall:", recall)

Precision: 0.46603847982511026
Recall: 0.49703413698158205


For model final_aug_best_sgd.hdf5

In [42]:
test_df=pd.read_csv("../data/processed/testpath_df.csv")
X_test,y_test=load_data(test_df.path)
y_test=np.vstack(y_test)
X_test.shape,y_test.shape

((40573, 50, 50, 3), (40573, 1))

In [43]:
test_loss, test_acc = model.evaluate(X_test,y_test, verbose=2)

print('\nTest accuracy:', test_acc)
print('\nTest Loss:', test_loss)
Y_pred = model.predict(X_test)
y_pred = (Y_pred > 0.5).astype(np.int64)

confusion_matrix(y_test, y_pred)

1268/1268 - 11s - loss: 119.7890 - acc: 0.7418 - 11s/epoch - 9ms/step

Test accuracy: 0.7418480515480042

Test Loss: 119.78897857666016


array([[29996,   468],
       [10006,   103]])

In [44]:
recall = recall_score(y_test, y_pred, average='macro')
precision = precision_score(y_test, y_pred, average='macro')
print("Precision:" ,precision)
print("Recall:", recall)

Precision: 0.46512389792069064
Recall: 0.4974132727950217


For model final_aug_best_sgd3.hdf5

In [51]:
X_test,y_test=load_data(test_df.path)
y_test=np.vstack(y_test)
X_test.shape,y_test.shape

((40756, 50, 50, 3), (40756, 1))

In [52]:
test_loss, test_acc = model.evaluate(X_test,y_test, verbose=2)

print('\nTest accuracy:', test_acc)
print('\nTest Loss:', test_loss)
Y_pred = model.predict(X_test)
y_pred = (Y_pred > 0.5).astype(np.int64)

confusion_matrix(y_test, y_pred)

1274/1274 - 11s - loss: 148.2442 - acc: 0.6918 - 11s/epoch - 9ms/step

Test accuracy: 0.6917508840560913

Test Loss: 148.24417114257812


array([[27982,   386],
       [12177,   211]])

In [53]:
recall = recall_score(y_test, y_pred, average='macro')
precision = precision_score(y_test, y_pred, average='macro')
print("Precision:" ,precision)
print("Recall:", recall)

Precision: 0.5251070670800486
Recall: 0.5017128656063461
