In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#Importing Libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, RMSprop
import pickle
import os
import matplotlib.pyplot as plt
import numpy as np
from skimage import io
from skimage.color import rgb2gray
from skimage.transform import resize
import random
from tqdm import tqdm

from tensorflow.keras import backend as K
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from tensorflow.keras.callbacks import ModelCheckpoint

from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers

from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

#Reading pre-processed dataset

In [None]:
####################################
## Reading the pre-processed dataset

path_X = '/content/drive/MyDrive/Agglutination/Dataset/Pre-Processed Dataset/' + 'Xcrop_rgb_v2.pickle'
path_y = '/content/drive/MyDrive/Agglutination/Dataset/Pre-Processed Dataset/' + 'ycrop_rgb_v2.pickle'

pickle_in = open(path_X, "rb")
X = pickle.load(pickle_in)
pickle_in.close()

pickle_in = open(path_y, "rb")
y = pickle.load(pickle_in)
pickle_in.close()

print(X.shape, y.shape)

(1285, 224, 224, 3) (1285,)


#For saving the calculated metrics

In [None]:
accuracies = []
precisions = []
sensitivities = []
specificities = []
F1_scores = []
auc_values = []

#Training and Testing the model for 50 iterations

In [None]:
for j in range(50):

    print('--------------')
    print('ITERATION:', j+1)
    print('--------------')

    X_train_, X_test, y_train_, y_test = train_test_split(X, y, test_size = 0.2, shuffle=True)
    X_train, X_val, y_train, y_val = train_test_split(X_train_, y_train_, test_size = 0.2, shuffle=True)

    print()
    print('---------------------------------------------------------------------------')
    print('There are', X_train.shape[0], 'images for the Training with their respectives', y_train.shape[0], 'labels.')
    print('There are', X_val.shape[0], 'images for the Validation with their respectives', y_val.shape[0], 'labels.')
    print('There are', X_test.shape[0], 'images for the Testing with their respectives', y_test.shape[0], 'labels.')
    print('---------------------------------------------------------------------------')
    print()

    print('-----------------------------------------------------------------------------------------------------------')
    sum_neg = 0
    sum_pos = 0

    for i in range(len(y_train)):
        if y_train[i]==0:
            sum_neg += 1
        else:
            sum_pos +=1 

    print('There are', sum_neg, 'negatives images and', sum_pos, 'positives images on the Training dataset: rate=', sum_neg/sum_pos)

    sum_neg = 0
    sum_pos = 0

    for i in range(len(y_val)):
        if y_train[i]==0:
            sum_neg += 1
        else:
            sum_pos +=1 

    print('There are', sum_neg, 'negatives images and', sum_pos, 'positives images on the Validation dataset: rate=', sum_neg/sum_pos)

    sum_neg = 0
    sum_pos = 0

    for i in range(len(y_test)):
        if y_test[i]==0:
            sum_neg += 1
        else:
            sum_pos +=1 

    print('There are', sum_neg, 'negatives images and', sum_pos, 'positives images on the Testing dataset: rate=', sum_neg/sum_pos)
    print('-----------------------------------------------------------------------------------------------------------')
    print()

    ########################################
    ## Defining the Transfer Learning model

    conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    conv_base.trainable = False

    model = None
    model = Sequential()
    model.add(conv_base)
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0005), metrics=['accuracy'])

    print(model.summary())
    print()
    print('------------------------------------')
    print('Training the Tranfer Learning Model')
    print('------------------------------------')
    print()

    ######################
    ## Training the model

    history = model.fit(X_train, y_train, batch_size=32, epochs=30, shuffle=False, verbose=1, validation_data=(X_val, y_val))

    print()
    print('------------------------')
    print('Evaluation of the Model')
    print('------------------------')

    ############################################
    ## Testing the model on the Testing dataset

    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print()
    print('Accuracy on the Testing dataset:', test_accuracy*100, '%')
    print()

    #######################
    ## Calculating metrics

    y_preds = model.predict(X_test).ravel()
    fpr, tpr, thresholds = roc_curve(y_test, y_preds)
    auc_value = auc(fpr, tpr)

    mythreshold = 0.5
    y_pred = (model.predict(X_test)>= mythreshold).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    acc = (tn + tp)/(tn + fp + fn + tp)
    prec = tp/(tp + fp)
    sens = tp/(tp + fn)
    spec = tn/(tn + fp)
    f1_score = (2*prec*sens)/(prec + sens)

    accuracies.append(acc)
    precisions.append(prec)
    sensitivities.append(sens)
    specificities.append(spec)
    F1_scores.append(f1_score)
    auc_values.append(auc_value)

    #############################################
    ## Saving the models with certain conditions

    if test_accuracy > 0.96 and auc_value > 0.99:
        model.save('/content/drive/MyDrive/Agglutination/Best models/' + 'modelo_acc_{:4f}_iter_{}.h5'.format(test_accuracy,j+1))

[1;30;43mSe truncaron las últimas líneas 5000 del resultado de transmisión.[0m
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30

------------------------
Evaluation of the Model
------------------------

Accuracy on the Testing dataset: 95.7198441028595 %

--------------
ITERATION: 4
--------------

---------------------------------------------------------------------------
There are 822 images for the Training with their respectives 822 labels.
There are 206 images for the Validation with their respectives 206 labels.
There are 257 images for the Testing with their respectives 257 labels.
---------------------------------------------------------------------------

-----------------------------------------------------------------------------------------------------------
There are 400 negatives images and 422 positives images on the Training dataset: rate= 0.9478672985781991
There are 93 negatives images and 113 positives images on the Validation dataset: rate= 0.823008849

#Saving the calculated metrics in a .csv file

In [None]:
import pandas as pd

path_csv = '/content/drive/MyDrive/Agglutination/'
name_metrics_csv = 'stats_metrics.csv'

dict_metrics = dict({'Accuracy':accuracies, 'Precision':precisions, 'Sensitivity':sensitivities, 'Specificity':specificities, 'F1 Score':F1_scores, 'AUC': auc_values})
df_metrics = pd.DataFrame(dict_metrics)
df_metrics.to_csv(path_csv + name_metrics_csv, index=False)
df_metrics

Unnamed: 0,Accuracy,Precision,Sensitivity,Specificity,F1 Score,AUC
0,0.957198,0.938356,0.985612,0.923729,0.961404,0.993781
1,0.957198,0.968,0.945312,0.968992,0.956522,0.994186
2,0.957198,0.954198,0.961538,0.952756,0.957854,0.994367
3,0.953307,0.991525,0.914062,0.992248,0.95122,0.996972
4,0.957198,0.984496,0.933824,0.983471,0.958491,0.996719
5,0.941634,0.913386,0.966667,0.919708,0.939271,0.991119
6,0.980545,1.0,0.962687,1.0,0.980989,0.997998
7,0.957198,0.984,0.931818,0.984,0.957198,0.997576
8,0.968872,1.0,0.935484,1.0,0.966667,0.995331
9,0.957198,0.976562,0.93985,0.975806,0.957854,0.991632


#Statistics of calculated metrics

In [None]:
df_metrics.describe()

Unnamed: 0,Accuracy,Precision,Sensitivity,Specificity,F1 Score,AUC
count,50.0,50.0,50.0,50.0,50.0,50.0
mean,0.960934,0.970852,0.954319,0.967754,0.961844,0.995242
std,0.013059,0.026698,0.029523,0.032003,0.012677,0.002324
min,0.914397,0.863636,0.869231,0.829268,0.923611,0.988594
25%,0.957198,0.954935,0.935655,0.955136,0.957385,0.993765
50%,0.961089,0.976831,0.961972,0.974783,0.964027,0.995315
75%,0.967899,0.991507,0.978062,0.991631,0.967502,0.996945
max,0.980545,1.0,1.0,1.0,0.981413,0.999212
