In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#Importing Libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, RMSprop
import pickle
import os
import matplotlib.pyplot as plt
import numpy as np
from skimage import io
from skimage.color import rgb2gray
from skimage.transform import resize
import random
from tqdm import tqdm

from tensorflow.keras import backend as K
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from tensorflow.keras.callbacks import ModelCheckpoint

from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers

from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

#Reading pre-processed dataset

In [None]:
####################################
## Reading the pre-processed dataset

path_X = '/content/drive/MyDrive/Agglutination/Dataset/Pre-Processed Dataset/' + 'Xcrop_rgb_v2.pickle'
path_y = '/content/drive/MyDrive/Agglutination/Dataset/Pre-Processed Dataset/' + 'ycrop_rgb_v2.pickle'

pickle_in = open(path_X, "rb")
X = pickle.load(pickle_in)
pickle_in.close()

pickle_in = open(path_y, "rb")
y = pickle.load(pickle_in)
pickle_in.close()

print(X.shape, y.shape)

(1285, 224, 224, 3) (1285,)


#For saving the calculated metrics

In [None]:
accuracies = []
precisions = []
sensitivities = []
specificities = []
F1_scores = []
auc_values = []

#Training and Testing the model using 5-fold cross-validation

In [None]:
from sklearn.model_selection import StratifiedKFold

n = 5
j = 0
folds = StratifiedKFold(n_splits=n)

for train_index, test_index in folds.split(X, y):

    print('--------------')
    print('ITERATION:', j+1)
    print('--------------')

    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]

    print()
    print('---------------------------------------------------------------------------')
    print('There are', X_train.shape[0], 'images for the Training with their respectives', y_train.shape[0], 'labels.')
    print('There are', X_test.shape[0], 'images for the Testing with their respectives', y_test.shape[0], 'labels.')
    print('---------------------------------------------------------------------------')
    print()

    print()
    print('-----------------------------------------------------------------------------------------------------------')
    sum_neg = 0
    sum_pos = 0

    for i in range(len(y_train)):
        if y_train[i]==0:
            sum_neg += 1
        else:
            sum_pos +=1 

    print('There are', sum_neg, 'negatives images and', sum_pos, 'positives images on the Training dataset: rate=', sum_neg/sum_pos)

    sum_neg = 0
    sum_pos = 0

    for i in range(len(y_test)):
        if y_test[i]==0:
            sum_neg += 1
        else:
            sum_pos +=1 

    print('There are', sum_neg, 'negatives images and', sum_pos, 'positives images on the Testing dataset: rate=', sum_neg/sum_pos)
    print('-----------------------------------------------------------------------------------------------------------')
    print()

    ########################################
    ## Defining the Transfer Learning model

    conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    conv_base.trainable = False

    model = None
    model = Sequential()
    model.add(conv_base)
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0005), metrics=['accuracy'])

    print(model.summary())
    print()
    print('------------------------------------')
    print('Training the Tranfer Learning Model')
    print('------------------------------------')
    print()

    ######################
    ## Training the model

    history = model.fit(X_train, y_train, batch_size=32, epochs=30, shuffle=False, verbose=1)

    print()
    print('------------------------')
    print('Evaluation of the Model')
    print('------------------------')

    ############################################
    ## Testing the model on the Testing dataset

    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print()
    print('Accuracy on the Testing dataset:', test_accuracy*100, '%')
    print()

    #######################
    ## Calculating metrics

    y_preds = model.predict(X_test).ravel()
    fpr, tpr, thresholds = roc_curve(y_test, y_preds)
    auc_value = auc(fpr, tpr)

    mythreshold = 0.5
    y_pred = (model.predict(X_test)>= mythreshold).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    acc = (tn + tp)/(tn + fp + fn + tp)
    prec = tp/(tp + fp)
    sens = tp/(tp + fn)
    spec = tn/(tn + fp)
    f1_score = (2*prec*sens)/(prec + sens)

    accuracies.append(acc)
    precisions.append(prec)
    sensitivities.append(sens)
    specificities.append(spec)
    F1_scores.append(f1_score)
    auc_values.append(auc_value)

    ##############################
    ## Saving all the five models 

    model.save('/content/drive/MyDrive/Agglutination Google Project/cross-validaton models/' + 'cv_model_acc_{:4f}_iter_{}.h5'.format(test_accuracy,j+1))
    j += 1

--------------
ITERATION: 1
--------------

---------------------------------------------------------------------------
There are 1028 images for the Training with their respectives 1028 labels.
There are 257 images for the Testing with their respectives 257 labels.
---------------------------------------------------------------------------


-----------------------------------------------------------------------------------------------------------
There are 496 negatives images and 532 positives images on the Training dataset: rate= 0.9323308270676691
There are 124 negatives images and 133 positives images on the Testing dataset: rate= 0.9323308270676691
-----------------------------------------------------------------------------------------------------------

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 7, 7, 512)         14714688  
_______

#Saving the calculated metrics in a .csv file

In [None]:
import pandas as pd

path_csv = '/content/drive/MyDrive/Agglutination/'
name_metrics_csv = 'cv5_metrics.csv'

dict_metrics_cv = dict({'Accuracy':accuracies, 'Precision':precisions, 'Sensitivity':sensitivities, 'Specificity':specificities, 'F1 Score':F1_scores, 'AUC': auc_values})
df_metrics_cv = pd.DataFrame(dict_metrics_cv)
df_metrics_cv.to_csv(path_csv + name_metrics_csv, index=False)
df_metrics_cv

Unnamed: 0,Accuracy,Precision,Sensitivity,Specificity,F1 Score,AUC
0,0.953307,0.941606,0.969925,0.935484,0.955556,0.99042
1,0.961089,0.984252,0.93985,0.983871,0.961538,0.993088
2,0.964981,1.0,0.932331,1.0,0.964981,0.994179
3,0.984436,1.0,0.969925,1.0,0.984733,0.997393
4,0.968872,0.992126,0.947368,0.991935,0.969231,0.996726


#Statistics of calculated metrics

In [None]:
df_metrics_cv.describe()

Unnamed: 0,Accuracy,Precision,Sensitivity,Specificity,F1 Score,AUC
count,5.0,5.0,5.0,5.0,5.0,5.0
mean,0.966537,0.983597,0.95188,0.982258,0.967208,0.994361
std,0.011543,0.024365,0.01731,0.026989,0.011001,0.002827
min,0.953307,0.941606,0.932331,0.935484,0.955556,0.99042
25%,0.961089,0.984252,0.93985,0.983871,0.961538,0.993088
50%,0.964981,0.992126,0.947368,0.991935,0.964981,0.994179
75%,0.968872,1.0,0.969925,1.0,0.969231,0.996726
max,0.984436,1.0,0.969925,1.0,0.984733,0.997393
