In [None]:
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from keras.losses import sparse_categorical_crossentropy
from keras.optimizers import Adam
from sklearn.model_selection import KFold
import numpy as np
import json
import math
import os
import cv2
from PIL import Image
import numpy as np
from keras import layers
from keras.applications import ResNet50,MobileNet, DenseNet201, InceptionV3, NASNetLarge, InceptionResNetV2, NASNetMobile,VGG16,Xception
from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, TensorBoard,CSVLogger
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score
from sklearn.preprocessing import LabelEncoder
import scipy
from tqdm import tqdm
import tensorflow as tf
from keras import backend as K
import gc
from functools import partial
from sklearn import metrics
from collections import Counter
import json
import itertools
from sklearn.metrics import f1_score
#confusion matrix
from sklearn.metrics import confusion_matrix

In [None]:
# Model configuration
batch_size = 32
no_epochs = 10
optimizer = Adam()
verbosity = 1
num_folds = 10

In [None]:
#Transfer 'jpg' images to an array IMG
def Dataset_loader(DIR, RESIZE, sigmaX=10):
    IMG = []
    read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
    for IMAGE_NAME in tqdm(os.listdir(DIR)):
        PATH = os.path.join(DIR,IMAGE_NAME)
        _, ftype = os.path.splitext(PATH)
        if ftype == ".jpg":
            img = read(PATH)

            img = cv2.resize(img, (RESIZE,RESIZE))

            IMG.append(np.array(img))
    return IMG

data_path = "/kaggle/input/x3data/x3"

benign_train = np.array(Dataset_loader(data_path+'/train/Healthy',224))
malign_train = np.array(Dataset_loader(data_path+'/train/Infected',224))
benign_test = np.array(Dataset_loader(data_path+'/test/Healthy',224))
malign_test = np.array(Dataset_loader(data_path+'/test/Infected',224))

In [None]:
# Create labels
benign_train_label = np.zeros(len(benign_train))
malign_train_label = np.ones(len(malign_train))
benign_test_label = np.zeros(len(benign_test))
malign_test_label = np.ones(len(malign_test))

# Merge data 
X_train = np.concatenate((benign_train, malign_train), axis = 0)
Y_train = np.concatenate((benign_train_label, malign_train_label), axis = 0)
X_test = np.concatenate((benign_test, malign_test), axis = 0)
Y_test = np.concatenate((benign_test_label, malign_test_label), axis = 0)

# Shuffle train data
s = np.arange(X_train.shape[0])
np.random.shuffle(s)
X_train = X_train[s]
Y_train = Y_train[s]

# Shuffle test data
s = np.arange(X_test.shape[0])
np.random.shuffle(s)
X_test = X_test[s]
Y_test = Y_test[s]

# To categorical
Y_train = to_categorical(Y_train, num_classes= 2)
Y_test = to_categorical(Y_test, num_classes= 2)

In [None]:
# Define per-fold score containers
acc_per_fold = []
loss_per_fold = []

In [None]:
# Merge inputs and targets
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((Y_train, Y_test), axis=0)

In [None]:
def build_model(backbone, lr=1e-4):
    model = Sequential()
    model.add(backbone)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(2, activation='softmax'))
    
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=lr),
        metrics=['accuracy']
    )
    
    return model

In [None]:
# Learning Rate Reducer
learn_control = ReduceLROnPlateau(monitor='val_accuracy', patience=10,
                                  verbose=1,factor=0.2, min_lr=1e-7)

# Checkpoint
filepath="VGG16_x5.weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
csv_logger = CSVLogger("model_history_log.csv", append=True)


In [None]:
# Define the K-fold Cross Validator
kfold = KFold(n_splits=num_folds, shuffle=True)

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=55)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [None]:
# K-fold Cross Validation model evaluation
loss_function = sparse_categorical_crossentropy
fold_no = 1
f1_scores = []
for train, test in kfold.split(inputs, targets):

    K.clear_session()
    gc.collect()

    vgg = ResNet50(
        weights='imagenet',
        include_top=False,
        input_shape=(224,224,3)
    )
    model = build_model(vgg ,lr = 1e-4)
 


  # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

  # Fit data to model
    history = model.fit(
                inputs[train], targets[train],
                
                validation_data=(inputs[test], targets[test]),
                batch_size=batch_size,
                epochs=no_epochs,
                verbose=verbosity,
                callbacks=[learn_control, checkpoint, csv_logger]
                )

  # Generate generalization metrics
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])
    
     # Calculate F1 score
    y_pred = model.predict(inputs[test])
    y_pred_classes = np.argmax(y_pred, axis=1)
    fold_f1_score = f1_score(np.argmax(targets[test], axis=1), y_pred_classes)
    f1_scores.append(fold_f1_score)
    
    Y_val_pred = model.predict(X_test)
  
    Y_pred = model.predict(X_test)    
    
    cm = confusion_matrix(np.argmax(Y_test, axis=1), np.argmax(Y_pred, axis=1))

    cm_plot_label =['healthy', 'infected']
    plot_confusion_matrix(cm, cm_plot_label, title ='Confusion Metrix for ALL')

  # Increase fold number
    fold_no = fold_no + 1


In [None]:
print(f1_scores)

# Calculate average F1 score
average_f1_score = np.mean(f1_scores)

# Print average F1 score
print("Average F1 score:", average_f1_score)

In [None]:
# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import json

def plot_history(his):
    """
    Plots the training and validation accuracy and loss of a Keras model.
    """
    # Save the history to a JSON file
    with open('history.json', 'w') as f:
        json.dump(str(his.history), f)

    # Create a DataFrame from the history object
    history_df = pd.DataFrame(his.history)

    # Plot the training and validation accuracy
    plt.subplot(2, 1, 1)
    plt.plot(history_df['accuracy'])
    plt.plot(history_df['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='lower right')
    plt.ylim([0, 1])

    # Plot the training and validation loss
    plt.subplot(2, 1, 2)
    plt.plot(history_df['loss'])
    plt.plot(history_df['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper right')
    plt.ylim([0, max(history_df['loss'])+1])

    # Show the plot
    plt.tight_layout()
    plt.show()

plot_history(history)

In [None]:
model.load_weights("VGG16_x5.weights.best.hdf5")

In [None]:
Y_val_pred = model.predict(X_test)

In [None]:
Y_pred = model.predict(X_test)

In [None]:
tta_steps = 10
predictions = []
for i in tqdm(range(tta_steps)):
    preds = model.predict(X_test,
                                    steps = 1)#len(X_test)/BATCH_SIZE)
    
    predictions.append(preds)
    gc.collect()
    
Y_pred_tta = np.mean(predictions, axis=0)


In [None]:
print(Y_pred_tta)

In [None]:
#confusion matrix
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=55)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
  

cm = confusion_matrix(np.argmax(Y_test, axis=1), np.argmax(Y_pred, axis=1))

cm_plot_label =['healthy', 'infected']
plot_confusion_matrix(cm, cm_plot_label, title ='Confusion Metrix for ALL')

In [None]:
cm = confusion_matrix(np.argmax(Y_test, axis=1), np.argmax(Y_pred_tta, axis=1))

cm_plot_label =['healty', 'infected']
plot_confusion_matrix(cm, cm_plot_label, title ='Confusion Metrix for ALL')

In [None]:
from sklearn.metrics import classification_report
print(classification_report( np.mean(Y_test, axis=1), np.mean(Y_pred_tta, axis=1), digits=4))

In [None]:
from sklearn.metrics import classification_report
classification_report( np.argmax(Y_test, axis=1), np.argmax(Y_pred_tta, axis=1))

from sklearn.metrics import roc_auc_score, auc
from sklearn.metrics import roc_curve
roc_log = roc_auc_score(np.argmax(Y_test, axis=1), np.argmax(Y_pred_tta, axis=1))
false_positive_rate, true_positive_rate, threshold = roc_curve(np.argmax(Y_test, axis=1), np.argmax(Y_pred_tta, axis=1))
area_under_curve = auc(false_positive_rate, true_positive_rate)

plt.plot([0, 1], [0, 1], 'r--')
plt.plot(false_positive_rate, true_positive_rate, label='AUC = {:.3f}'.format(area_under_curve))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()
#plt.savefig(ROC_PLOT_FILE, bbox_inches='tight')
plt.close()

In [None]:
i=0
prop_class=[]
mis_class=[]

for i in range(len(Y_test)):
        prop_class.append(i)
    

i=0
for i in range(len(Y_test)):
        mis_class.append(i)
   

# # Display first 8 images of benign
w=224
h=224
fig=plt.figure(figsize=(14, 30))
columns = 5
rows = 9

def Transfername(namecode):
    if namecode==0:
        return "Healty"
    else:
        return "Infected"
    
for i in range(len(prop_class)):
    ax = fig.add_subplot(rows, columns, i+1)
    ax.set_title("Predicted result:"+ Transfername(np.argmax(Y_pred_tta[prop_class[i]]))
                       +"\n"+"Actual result: "+ Transfername(np.argmax(Y_test[prop_class[i]])))
                      
    plt.imshow(X_test[prop_class[i]], interpolation='nearest')
plt.show()