In [None]:
####### Imports #######
# Directory and file paths processing
# Image processing into 3D arrays
import re
import os
import sys
sys.path.append('/home/pfa/Documents/Code')
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.preprocessing import image
from keras.utils import to_categorical
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix
from sort import sort_nicely
from multiprocessing import Process, Manager
import pickle
import json

In [None]:
## Get datase
base_dir = '/home/datasets/GAIT-IT'
metadata_dir = '/home/datasets/metadata'

sample_count = 0
# all_images = {'train': [], 'validation': [], 'test': []}
# all_labels = {'train': [], 'validation': [], 'test': []}
subjects_data = {}
# train_subjs = ['s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16','s17','s18','s21']
# validation_subjs = ['s19','s20']
# test_subjs = ['s22','s23']
train_images = []; train_labels = []
validation_images = []; validation_labels = []
test_images = []; test_labels = []

classes = {'Diplegic' : 0, 'Hemiplegic' : 1, 'Neuropathic' : 2, 'Normal' : 3, 'Parkinson' : 4}
classes_inv = {0 : 'Diplegic', 1 : 'Hemiplegic', 2 : 'Neuropathic', 3 : 'Normal', 4 : 'Parkinson'}

In [None]:
## Train silhouettes directories
def sets(train_subjs,validation_subjs,test_subjs):
    sample_count = 0
    all_images = {'train': [], 'validation': [], 'test': []}
    all_labels = {'train': [], 'validation': [], 'test': []}

    # Sort pathologies, OCD purposes only
    pathologies = list(classes.keys())
    sort_nicely(pathologies)
    for pathology in pathologies:

        pathology_dir = base_dir + '/{}'.format(pathology)
        print(pathology_dir)
        pathology_subj_folders = [name for name in os.listdir(pathology_dir) if os.path.isdir(os.path.join(pathology_dir, name))]
        sort_nicely(pathology_subj_folders)
        
        # /Pathology/subj{i}/silhouettes/subj_{i}-pat_{j}-lvl_{k}-{l}_{direction}
        for subj_folder in pathology_subj_folders:

            if subj_folder in train_subjs: subj_set = 'train'
            elif subj_folder in validation_subjs: subj_set = 'validation'
            elif subj_folder in test_subjs: subj_set = 'test'

            subj_folder_dir = os.path.join(pathology_dir, subj_folder)
            # print(subj_folder_dir)
            subj_silhouettes_dir = os.path.join(subj_folder_dir, 'silhouettes', 'side_view')
            # print(subj_silhouettes_dir)

            subj_silhouettes_folders = [name for name in os.listdir(subj_silhouettes_dir) if os.path.isdir(os.path.join(subj_silhouettes_dir, name))]
            sort_nicely(subj_silhouettes_folders)

            folders = [f for f in subj_silhouettes_folders if '_' in f]
            for folder in folders:

                # Initialize dictionary to store key frames
                key_frames = {}

                # Directory with metadata about current folder
                subj_silhouettes_metadata_dir = subj_silhouettes_dir.replace('GAIT-IT', 'metadata')
                subj_pat_metadata = os.path.join(subj_silhouettes_metadata_dir,'metadata/key_frames.json')

                with open(subj_pat_metadata) as f:
                    key_frames = json.load(f)

                # Directory with the sillouettes images
                subj_pat_lvl_dir = os.path.join(subj_silhouettes_dir, folder)
                # print(subj_pat_lvl_dir)

                files = os.listdir(subj_pat_lvl_dir)
                sort_nicely(files)
                file_names = [files[f] for f in key_frames[folder]]
                
                # Convert images to numpy arrays, put in batches
                # for file_name in file_names:
                for i in range(0, len(file_names)-8, 9):
                    for j in range(0,9):
                        file_path = os.path.join(subj_pat_lvl_dir, file_names[i+j])
                        img = image.load_img(file_path, target_size=(224, 224))
                        img_tensor = image.img_to_array(img)
                        sample_count += 1

                        all_images[subj_set].append(img_tensor)
                        all_labels[subj_set].append(classes[pathology])
    print(len(all_images['train'])); print(len(all_images['validation'])); print(len(all_images['test']))
    print(len(all_labels['train'])); print(len(all_labels['validation'])); print(len(all_labels['test']))
    
    return all_images, all_labels

In [None]:
# Dense Classifier
from tensorflow.keras import models, layers, optimizers
## Import VGG19 convolutional base
from tensorflow.keras.applications import VGG16, VGG19

def buildModel():

    # Build de VGG19 Convolutional Network base
    conv_base = VGG19(weights='imagenet',
                      include_top=False,
                      input_shape=(224, 224, 3))

    # Train convolutional blocks 3, 4 and 5
    conv_base.trainable = True

    set_trainable = False
    for layer in conv_base.layers:
        if layer.name == 'block3_conv1':
            set_trainable = True
        if set_trainable:
            layer.trainable = True
        else:
            layer.trainable = False

    # Build the Fully connected layers of the CNN for classification
    model = models.Sequential()
    model.add(conv_base)
    model.add(layers.Flatten())
    model.add(layers.Dense(4096, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(4096, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(5, activation='softmax'))

    model.compile(optimizer=optimizers.SGD(learning_rate=0.0002, momentum=0.9, nesterov=True),
                      loss='categorical_crossentropy',
                      metrics = ['acc'])
    #model.summary()
    return model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

def create_train(k, all_history, confusion_matrices):

    # Define Model Checkpoint callback
    checkpointer = ModelCheckpoint(monitor='val_acc', filepath='Train_GAIT_IT_Silhouettes/modelsGEIs/model{}.hdf5'.format(k), verbose=1, save_best_only=True)
    early_stopping_criteria = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')


    all_validation_subjs = [['s1','s2','s3'],['s3','s4','s5'],['s5','s6','s7'],['s7','s8','s9'],['s9','s10','s11'],['s11','s12','s14'],['s14','s15','s16'],['s16','s17','s18'],['s18','s19','s20'],['s20','s22','s23']]
    validation_subjs = all_validation_subjs[k]
    test_subjs = ['s13','s21']
    train_subjs = ['s{}'.format(i) for i in range(1,24) if 's{}'.format(i) not in validation_subjs and 's{}'.format(i) not in test_subjs]

    print(train_subjs, validation_subjs, test_subjs)

    # Split data into training, validation and test sets
    all_images, all_labels = sets(train_subjs=train_subjs, validation_subjs=validation_subjs, test_subjs=test_subjs)
    
    train_images = np.array(all_images['train']); train_labels = to_categorical(np.array(all_labels['train']))
    validation_images = np.array(all_images['validation']); validation_labels = to_categorical(np.array(all_labels['validation']))
    test_images = np.array(all_images['test']); test_labels = to_categorical(np.array(all_labels['test']))

    # Call model creator
    model = buildModel()

    # Train model using keras.fit
    history = model.fit(train_images, train_labels,
                        epochs=50,
                        batch_size=15,
                        validation_data=(validation_images, validation_labels), callbacks=[checkpointer, early_stopping_criteria], verbose=1)
    
    # Update model history with current model for current validation subject
    all_history.append(history.history)

    # Load best model from checkpoint of current fold
    model = load_model('Train_GAIT_IT_Silhouettes/modelsGEIs/model{}.hdf5'.format(k))
    
    # # Check validation accuracy
    # results = model.evaluate(validation_images, validation_labels)
    # print(results)

    predictions = model.predict(validation_images)
    conf_mat = confusion_matrix(np.argmax(validation_labels, axis=1), np.argmax(predictions, axis=1))
    confusion_matrices.append(conf_mat)

In [None]:
# Define number iterations for k-folds
for k in range(0,10):

    # Create list to store models history through k-folds iterations for cross validation
    all_history = Manager().list()
    confusion_matrices = Manager().list()

    # Train model with process to free GPU memory after training
    p = Process(target=create_train, args=(k, all_history, confusion_matrices))
    p.start()
    p.join()

    # Conver manager list back to normal list
    history = [item for item in all_history]

    # Store performance history of each fold
    with open('Train_GAIT_IT_Silhouettes/dataGEIs/all_history{}'.format(k), 'wb') as f:
        pickle.dump(history,f)

    # Convert manager list back to normal list
    conf_matrices = [item for item in confusion_matrices]

    # Store list with confusion matrices from every fold
    with open("Train_GAIT_IT_Silhouettes/dataGEIs/confusion_matrices{}".format(k), 'wb') as f:
        pickle.dump(conf_matrices,f)

In [None]:
all_history = []
for i in range(0,10):
    with open("Train_GAIT_IT_Silhouettes/dataGEIs/all_history{}".format(i), 'rb') as f:
        history = pickle.load(f)
        all_history.extend(history)
with open("Train_GAIT_IT_Silhouettes/dataGEIs/all_history", 'wb') as f:
    pickle.dump(all_history,f)

In [None]:
with open("Train_GAIT_IT_Silhouettes/dataGEIs/all_history", 'rb') as f:
    all_history = pickle.load(f)

In [None]:
## Plot results
print(len(all_history))

average_acc = [np.mean([x["acc"][i] for x in all_history]) for i in range(50)]
average_loss = [np.mean([x["loss"][i] for x in all_history]) for i in range(50)]
average_val_acc = [np.mean([x["val_acc"][i] for x in all_history]) for i in range(50)]
average_val_loss = [np.mean([x["val_loss"][i] for x in all_history]) for i in range(50)]

plt.plot(range(1, len(average_acc) + 1), average_acc, 'bo', label = 'Training accuracy')
plt.plot(range(1, len(average_val_acc) + 1), average_val_acc, 'b', label = 'Testing accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('Train_GAIT_IT_Silhouettes/dataGEIs/acc_history')

plt.figure()

plt.plot(range(1, len(average_loss) + 1), average_loss, 'bo', label = 'Training loss')
plt.plot(range(1, len(average_val_loss) + 1), average_val_loss, 'b', label = 'Testing loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('Train_GAIT_IT_Silhouettes/dataGEIs/loss_history')

plt.show()

In [None]:
all_confusion_matrices = []
for i in range(0,10:
    with open("Train_GAIT_IT_Silhouettes/dataGEIs/confusion_matrices{}".format(i), 'rb') as f:
        confusion_matrices = pickle.load(f)
        all_confusion_matrices.extend(confusion_matrices)
with open("Train_GAIT_IT_Silhouettes/dataGEIs/all_confusion_matrices", 'wb') as f:
    pickle.dump(all_confusion_matrices,f)

In [None]:
def evaluate_model(k, confusion_matrices):

    all_validation_subjs = [['s1','s2','s3'],['s3','s4','s5'],['s5','s6','s7'],['s7','s8','s9'],['s9','s10','s11'],['s11','s12','s14'],['s14','s15','s16'],['s16','s17','s18'],['s18','s19','s20'],['s20','s22','s23']]
    validation_subjs = all_validation_subjs[k]
    test_subjs = ['s13','s21']
    train_subjs = ['s{}'.format(i) for i in range(1,24) if 's{}'.format(i) not in validation_subjs and 's{}'.format(i) not in test_subjs]

    print(train_subjs, validation_subjs, test_subjs)

    # Split data into training, validation and test sets
    all_images, all_labels = sets(train_subjs=train_subjs, validation_subjs=validation_subjs, test_subjs=test_subjs)
    
    train_images = np.array(all_images['train']); train_labels = to_categorical(np.array(all_labels['train']))
    validation_images = np.array(all_images['validation']); validation_labels = to_categorical(np.array(all_labels['validation']))
    test_images = np.array(all_images['test']); test_labels = to_categorical(np.array(all_labels['test']))

    # Load best model from checkpoint of current fold
    model = load_model('Train_GAIT_IT_Silhouettes/modelsGEIs/model{}.hdf5'.format(k))
    
    # Check validation accuracy
    results = model.evaluate(validation_images, validation_labels)
    print(results)

    predictions = model.predict(validation_images)
    conf_mat = confusion_matrix(np.argmax(validation_labels, axis=1), np.argmax(predictions, axis=1))
    confusion_matrices.append(conf_mat)

In [None]:
# Load best models of each fold and compute confusion matrices
confusion_matrices = Manager().list()
for k in range(0,10):
    p = Process(target=evaluate_model, args=(k, confusion_matrices))
    p.start()
    p.join()

# Convert manager list back to normal list
conf_matrices = [item for item in confusion_matrices]

# Store list with confusion matrices from every fold
with open("Train_GAIT_IT_Silhouettes/dataGEIs/confusion_matrices", 'wb') as f:
    pickle.dump(conf_matrices,f)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

# Function to display confusion matrices
def DisplayCM(CM, title, save):

    display_labels = ['Diplegic', 'Hemiplegic', 'Neuropathic', 'Normal', 'Parkinson']
    fig, ax = plt.subplots(figsize=(10, 10))
    disp = ConfusionMatrixDisplay(confusion_matrix=CM,
                                display_labels=display_labels)
    disp = disp.plot(include_values=True,
                    cmap='viridis', ax=ax, xticks_rotation='horizontal')
    plt.title(title)
    plt.tight_layout()

    if save: plt.savefig('Train_GAIT_IT_Silhouettes/dataGEIs/Confusion Matrices/' + title)


# Initialize normalized and average CMs and normalized CMs list
cmAVG = np.zeros((5,5))
cmpercent = np.zeros((5,5))
cmpercentList = []

i = 1
# Create list for eachconfusion matrix in percentages
for cm in confusion_matrices:
    display_labels = ['Diplegic', 'Hemiplegic', 'Neuropathic', 'Normal', 'Parkinson']

    cmpercent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    cmpercentList.append(cmpercent)

#    # Plot average CM of current model
#     DisplayCM(CM=cmpercent, title='cmpercent{}'.format(i), save=True)
#     i += 1


for i in range(0,5):
    for j in range(0,5):
        cmAVG[i][j] = np.mean([x[i][j] for x in cmpercentList])

# Plot average CM of current model
DisplayCM(CM=cmAVG, title='AVG', save=True)

print(cmAVG)
print(np.mean([cmAVG[0][0],cmAVG[1][1], cmAVG[2][2], cmAVG[3][3], cmAVG[4][4]]))