In [1]:
%matplotlib inline

import os

#if using Theano with GPU
#os.environ["KERAS_BACKEND"] = "tensorflow"

import random
import numpy as np
import keras

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow

from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D
from keras.models import Model
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib.pyplot import imshow
from keras.models import load_model
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint,EarlyStopping
from sklearn.model_selection import KFold
import seaborn as sns

In [None]:
#root = '101_ObjectCategories'
#exclude = ['BACKGROUND_Google', 'Motorbikes', 'airplanes', 'Faces_easy', 'Faces']
root = 'D:/Milon2/final_CNN/Chunk_average_spectro'
train_split, val_split = 0.7, 0.15

categories = [x[0] for x in os.walk(root) if x[0]][1:]
#categories = [c for c in categories if c not in [os.path.join(root, e) for e in exclude]]

print(categories)

In [None]:
# helper function to load image and return it and input vector
def get_image(path):
    img = image.load_img(path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return img, x

In [None]:
data = []
for c, category in enumerate(categories):
    images = [os.path.join(dp, f) for dp, dn, filenames 
              in os.walk(category) for f in filenames 
              if os.path.splitext(f)[1].lower() in ['.jpg','.png','.jpeg']]
    for img_path in images:
        img, x = get_image(img_path)
        data.append({'x':np.array(x[0]), 'y':c})

# count the number of classes
num_classes = len(categories)

In [None]:
num_folds = 5
# Define the K-fold Cross Validator
kfold = KFold(n_splits=num_folds, shuffle=True, random_state=42)
fold_no = 1

for train, test in kfold.split(data):
    # Create output file paths using f-strings
    train_path = f"D:/Milon2/final_CNN/result/data/train/train{fold_no}.csv"
    test_path = f"D:/Milon2/final_CNN/result/data/test/test{fold_no}.csv"

    # Save if not already present
    if not os.path.exists(train_path):
        np.savetxt(train_path, train, delimiter=",", fmt='%s')
        np.savetxt(test_path, test, delimiter=",", fmt='%s')
    else:
        print("exists")

    fold_no += 1

In [None]:
X, y = np.array([t["x"] for t in data]), np.array([t["y"] for t in data])

# normalize data
X = X.astype('float32') / 255.



In [None]:
# Define per-fold score containers <-- these are new
acc_per_fold = []
loss_per_fold = []

import tensorflow as tf
import pandas as pd
from tensorflow.keras import utils as np_utils

tf.keras.backend.clear_session()

# K-fold Cross Validation model evaluation
fold_no = 1
for fold_no in range(1, 6):
    
    train=pd.read_csv(f"D:/Milon2/final_CNN/result/data/train/train{fold_no}.csv", sep=',',header=None)
    test=pd.read_csv(f"D:/Milon2/final_CNN/result/data/test/test{fold_no}.csv", sep=',',header=None)
    
    x_train = X[train[0]]
    y_train = y[train[0]]
    x_test = X[test[0]]
    y_test = y[test[0]]

    # x_train, y_train = np.array([t["x"] for t in train]), [t["y"] for t in train]
    # x_test, y_test = np.array([t["x"] for t in test]), [t["y"] for t in test]

    # normalize data
    # x_train = x_train.astype('float32') / 255.
    # x_test = x_test.astype('float32') / 255.

    # convert labels to one-hot vectors
    y_train = np_utils.to_categorical(y_train, num_classes)
    y_test = np_utils.to_categorical(y_test, num_classes)
    print(y_test.shape)

    # summary
    print("finished loading %d images from %d categories"%(len(data), num_classes))
    print("train / test split: %d, %d"%(len(x_train), len(x_test)))
    print("training data shape: ", x_train.shape)
    print("training labels shape: ", y_train.shape)

    #x_train = X[train]
    #y_train = y[train]
    #x_test = X[test]
    #y_test = y[test]
    
    
    # build the network
    model = Sequential()
    #print("Input dimensions: ",x_train.shape[1:])

    model.add(Conv2D(32, (3, 3), input_shape=(224, 224, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Dropout(0.25))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))

    model.add(Dropout(0.5))

    model.add(Dense(2))
    model.add(Activation('softmax'))


    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    model.summary()
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    
    #filepath="C:/Users/s4629860/Documents/Workspace/SZ/Dataset 5/CNN_epochs:{epoch:03d}-val_acc:{val_acc:.3f}.hdf5"
    filepath=f"D:/Milon2/final_CNN/result/model/best_model{fold_no}.best.keras"
    
    # define early stopping callback
    earlystop = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=150, mode='auto')

    # saves the model weights after each epoch if the validation loss decreased
    #checkpointer = ModelCheckpoint(filepath='LSTM_model.best.hdf5', verbose=1, save_best_only=True)
    checkpointer = ModelCheckpoint(filepath=filepath, verbose=1, save_best_only=True, monitor='val_accuracy')

    callbacks_list = [earlystop, checkpointer]
    
    history = model.fit(x_train, y_train, epochs = 5, batch_size=32,validation_data= (x_test, y_test), callbacks = callbacks_list)

    #load saved best model
    model = load_model(filepath)

    # Generate generalization metrics
    scores = model.evaluate(x_test, y_test, verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    Y_pred = model.predict(x_test)
    y_pred = np.argmax(Y_pred, axis=1)
    y_text_new = np.argmax(y_test, axis=1)

    from sklearn.metrics import classification_report, confusion_matrix
    #Confution Matrix and Classification Report

    print('Confusion Matrix')
    confusion = confusion_matrix(y_text_new, y_pred)
    print(confusion)
    print('Classification Report')
    target_names = ['AD', 'Normal']
    print(classification_report(y_text_new, y_pred, target_names=target_names))

    fig = plt.figure(figsize=(16,4))
    ax = fig.add_subplot(121)
    ax.plot(history.history["val_loss"])
    ax.set_title("validation loss")
    ax.set_xlabel("epochs")

    ax2 = fig.add_subplot(122)
    ax2.plot(history.history["val_accuracy"])
    ax2.set_title("validation accuracy")
    ax2.set_xlabel("epochs")
    ax2.set_ylim(0, 1)

    # Increase fold number
    #fold_no = fold_no + 1
    plt.show()
    
    ax= plt.subplot()
    sns.heatmap(confusion, annot=True, fmt="d", ax = ax); #annot=True to annotate cells

    # labels, title and ticks
    ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); 
    ax.set_title('Confusion Matrix'); 
    ax.xaxis.set_ticklabels(['ScZ', 'Healthy']); ax.yaxis.set_ticklabels(['ScZ', 'Healthy']);
    plt.show()    

In [None]:
# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')