In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from os import listdir
from glob import glob
from PIL import Image
import os
import cv2
import scipy.ndimage as ndimage
from skimage import io

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,precision_score,recall_score
from sklearn.metrics import roc_curve,auc

In [3]:
import tensorflow as tf
from tensorflow import keras
from keras import Model
from keras.utils import load_img, img_to_array
from keras.losses import binary_crossentropy
from keras.models import Sequential,load_model
from keras.layers import Conv2D,MaxPooling2D,Flatten,Dense,Dropout,BatchNormalization,GlobalAveragePooling2D
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras import backend as K
from keras import activations
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers


from numpy import expand_dims


In [4]:
from tensorflow.keras.applications.vgg16 import VGG16


In [5]:
train_df=pd.read_csv("../data/processed/trainpath_df2.csv")
val_df=pd.read_csv("../data/processed/valpath_df2.csv")
test_df=pd.read_csv("../data/processed/testpath_df2.csv")

In [6]:
def load_data(files):
    X = []
    y = []
    for file in files:
        img = load_img(file, target_size = (50,50))
        pix = img_to_array(img)
        pix /= 255
        X.append(pix)
        if(file[-5] == '1'):
             y.append(1)
        elif(file[-5] == '0'):
            y.append(0)
    return np.stack(X), y

In [7]:
X,y=load_data(train_df.path)
y=np.vstack(y)
X.shape,y.shape

((187022, 50, 50, 3), (187022, 1))

In [8]:
X_val,y_val=load_data(val_df.path) 
y_val=np.vstack(y_val)
X_val.shape,y_val.shape

((46084, 50, 50, 3), (46084, 1))

In [9]:
base_model = VGG16(input_shape = (50, 50, 3), # Shape of our images
include_top = False, # Leave out the last fully connected layer
weights = 'imagenet')

In [10]:
base_model.trainable = False

In [11]:
# Flatten the output layer to 1 dimension
x = base_model.output
x = GlobalAveragePooling2D()(x)
# Add a fully connected layer with 512 hidden units and ReLU activation
x = Dense(512, activation='relu')(x)

# Add a dropout rate of 0.5
x = Dropout(0.5)(x)

# Add a final sigmoid layer with 1 node for classification output
x = Dense(1, activation='sigmoid')(x)

In [12]:
model = Model(inputs=base_model.input, outputs=x)

In [13]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 50, 50, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 50, 50, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 50, 50, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 25, 25, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 25, 25, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 25, 25, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 12, 12, 128)       0     

In [14]:
#opt=optimizers.Adam(learning_rate=0.001)
opt=optimizers.SGD(learning_rate=1e-3, momentum=0.9)

In [15]:
model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=opt,
              metrics=['accuracy'])

checkpoint = ModelCheckpoint("../models/pretrained_vgc_sgd2.hdf5", monitor='val_accuracy', verbose=1,
    save_best_only=True, mode='max')
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)



In [16]:
M=model.fit(X, y,validation_data=(X_val, y_val),verbose = 1,epochs = 100,callbacks=[checkpoint,es])

Epoch 1/100
Epoch 1: val_accuracy improved from -inf to 0.75213, saving model to ../models/pretrained_vgc_sgd2.hdf5
Epoch 2/100
Epoch 2: val_accuracy improved from 0.75213 to 0.79110, saving model to ../models/pretrained_vgc_sgd2.hdf5
Epoch 3/100
Epoch 3: val_accuracy improved from 0.79110 to 0.79952, saving model to ../models/pretrained_vgc_sgd2.hdf5
Epoch 4/100
Epoch 4: val_accuracy did not improve from 0.79952
Epoch 5/100
Epoch 5: val_accuracy did not improve from 0.79952
Epoch 6/100
Epoch 6: val_accuracy did not improve from 0.79952
Epoch 7/100
Epoch 7: val_accuracy did not improve from 0.79952
Epoch 8/100
Epoch 8: val_accuracy did not improve from 0.79952
Epoch 8: early stopping


In [17]:
X_test,y_test=load_data(test_df.path)
y_test=np.vstack(y_test)
X_test.shape,y_test.shape

((44418, 50, 50, 3), (44418, 1))

In [18]:
test_loss, test_acc = model.evaluate(X_test,y_test, verbose=2)

print('\nTest accuracy:', test_acc)
print('\nTest Loss:', test_loss)
Y_pred = model.predict(X_test)
y_pred = (Y_pred > 0.5).astype(np.int64)

confusion_matrix(y_test, y_pred)

1389/1389 - 27s - loss: 0.4200 - accuracy: 0.8247 - 27s/epoch - 20ms/step

Test accuracy: 0.8247107267379761

Test Loss: 0.4200327694416046


array([[29295,  2567],
       [ 5219,  7337]])

In [19]:
recall = recall_score(y_test, y_pred, average='macro')
precision = precision_score(y_test, y_pred, average='macro')
print("Precision:" ,precision)
print("Recall:", recall)

Precision: 0.7947988965494839
Recall: 0.7518879777394586


In [22]:
print("Classification Report:",classification_report(y_test,y_pred))

Classification Report:               precision    recall  f1-score   support

           0       0.85      0.92      0.88     31862
           1       0.74      0.58      0.65     12556

    accuracy                           0.82     44418
   macro avg       0.79      0.75      0.77     44418
weighted avg       0.82      0.82      0.82     44418

