In [33]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Concatenate, Reshape, TimeDistributed
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm

In [4]:
# Load CSV data
csv_path = r'Dimentia Dataset\dementia_dataset.csv'
data = pd.read_csv(csv_path)
data = data.fillna(0)

print(data.columns)
for column in data.drop(columns=['Group', 'Subject ID', 'MRI ID']).columns:
        data[column] = data[column].astype('category').cat.codes

Index(['Subject ID', 'MRI ID', 'Group', 'Visit', 'MR Delay', 'M/F', 'Hand',
       'Age', 'EDUC', 'SES', 'MMSE', 'CDR', 'eTIV', 'nWBV', 'ASF'],
      dtype='object')


In [5]:
def generate_data(df, extra_trees_classifier):
    X = df[['Age', 'EDUC', 'Visit', 'MR Delay', 'Hand', 'SES', 'MMSE', 'CDR', 'eTIV', 'nWBV', 'ASF']]
    print(f'Number of Features before feature selection {X.shape}')
    y = df['Group'] # set the y to the dependent output variable
    model = extra_trees_classifier()
    model.fit(X,y)
    print(model)
    return X, y, model
    
X, y, model = generate_data(data, ExtraTreesClassifier)

Number of Features before feature selection (372, 11)
ExtraTreesClassifier()


In [6]:
def encode_output(y):
    onehot = pd.get_dummies(y)
    output = onehot.to_numpy()
    
    return output

In [7]:
y_encoded = encode_output(y)
print(y_encoded.shape)
print(y_encoded[:5])

(372, 3)
[[0 0 1]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]]


In [8]:
def reshape_input(data, alt_time_steps):
    if isinstance(data, pd.DataFrame):
        data = data.values
    
    # Reshape the data into a 3D array
    num_samples = data.shape[0] 
    num_features = data.shape[1]
    input = np.zeros((num_samples, alt_time_steps, num_features))
    for i in range(num_samples):
        if i+alt_time_steps < num_samples:
            input[i, :, :] = data[i:i+alt_time_steps, :]
            
    return input

In [9]:
def load_images(folder_path):
    images_dict = {}
    desired_size = (200, 200, 3)

    for root, dirs, files in tqdm(os.walk(folder_path), desc="Processing"):
#         print(root)
        # Skip the first entry (root is empty)
        if not files:
            continue
        
        # Sort the directories alphabetically
        dirs.sort()

        patient_images = []
        for file in sorted(files):  # Sort files alphabetically
            if file.endswith('.png') and "nifti" in file:
                # get image path
                image_path = os.path.join(root, file)

                # Load the header and image data
                image = keras.preprocessing.image.load_img(image_path, target_size=desired_size[:-1])
                image = keras.preprocessing.image.img_to_array(image)

                # Convert the image to RGB format
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Normalize the image to the range [0, 1]
                image = image.astype('float32') / 255.0

                # Append the preprocessed image to the list
                patient_images.append(image)

        # Store images in a dictionary with the folder as the key
        images_dict[root] = np.array(patient_images)

    # Convert the dictionary values to a NumPy array
    images_list = list(images_dict.values())
    images = np.array(images_list, dtype=object)

    return images

In [15]:
image_path = r'Dimentia Dataset/Output'
images = load_images(image_path)
images_sequences = np.array(images)
print(images_sequences.shape)

Processing: 373it [00:12, 30.30it/s]

(372,)





In [16]:
max_num_images = 4
image_shape = (200, 200, 3)
padded_images_sequence = [np.pad(sequence, ((0, max_num_images - sequence.shape[0]), (0, 0), (0, 0), (0, 0)), mode='constant') for sequence in images_sequences]
image_data = np.stack(padded_images_sequence)
desired_shape = (image_data[0].shape)
print(desired_shape)

(4, 200, 200, 3)


In [17]:
selected_features = ['Age','EDUC','MR Delay','SES','MMSE','CDR','eTIV','nWBV','ASF']

In [18]:
time_step = 4
X = X[selected_features]
X_reshaped = reshape_input(X, time_step)
print(X_reshaped.shape)
print(y_encoded.shape)

(372, 4, 9)
(372, 3)


In [21]:
# Define the MLP network for tabular data
class classifier():
    def __init__(self, num_tabular_features, desired_image_shape, time_step):
        self.num_tabular_features = num_tabular_features
        self.desired_image_shape = desired_image_shape
        self.num_features = num_tabular_features
        self.time_step = time_step
        self.tabular_model = None
        self.image_model = None
        self.multi_modal_model = None
    
    def tabular_classifier(self):
        tabular_model = Sequential(name="tabular_classifier")
        tabular_model.add(LSTM(256, input_shape=(self.time_step, self.num_features), activation='relu', dropout=0.2))
        tabular_model.add(Dense(3, activation='relu'))
        tabular_model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])
        
        return tabular_model

    # Define the CNN network for image data
    def image_classifier(self):
#         image_model = Sequential(name="image_classifier")
        image_input = Input(shape=self.desired_image_shape)
        image_conv1 = TimeDistributed(Conv2D(128, 1, activation='tanh', padding='valid'))(image_input)
        image_maxpool1 = TimeDistributed(MaxPooling2D((2, 2), padding='same'))(image_conv1)
        image_conv2 = TimeDistributed(Conv2D(256, 1, activation='tanh', padding='valid'))(image_maxpool1)
        image_maxpool2 = TimeDistributed(MaxPooling2D((2, 2), padding='same'))(image_conv2)
        image_flat = TimeDistributed(Flatten())(image_maxpool2)
        
        # Combine the sequence of flattened images along the time axis
        combined_image_features = Concatenate(axis=-1)([image_flat])
        # Flatten the time-distributed sequence
        flattened_features = Flatten()(combined_image_features)
        image_output = Dense(3, activation='softmax')(flattened_features)
        image_model = Model(inputs=image_input, outputs=image_output, name="image_classifier")
        image_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        
        return image_model
      
    def build_classifier(self):
        self.tabular_model = self.tabular_classifier()
        self.image_model = self.image_classifier()

        # Combine the feature vectors using concatenation
        combined_features = Concatenate()([self.tabular_model.output, self.image_model.output])

        # Fusion Layer: You can add additional layers here if needed
        fusion_layer = Dense(128, activation='sigmoid')(combined_features)

        # Output Layer for binary classification
        output_layer = Dense(3, activation='softmax')(fusion_layer)

        # Create the siamese network
        self.multi_modal_model = Model(inputs=[self.tabular_model.input, self.image_model.input], outputs=output_layer, name="dimentia_classifier")

        # Compile the model
        self.multi_modal_model.compile(optimizer='nadam', loss='categorical_crossentropy', metrics=['accuracy'])

In [22]:
dimentia_classifier = classifier(len(selected_features), desired_shape, time_step)
dimentia_classifier.build_classifier()
dimentia_classifier.tabular_model.summary()
dimentia_classifier.image_model.summary()

Model: "tabular_classifier"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 256)               272384    
                                                                 
 dense (Dense)               (None, 3)                 771       
                                                                 
Total params: 273,155
Trainable params: 273,155
Non-trainable params: 0
_________________________________________________________________
Model: "image_classifier"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 4, 200, 200, 3)]  0         
                                                                 
 time_distributed (TimeDistr  (None, 4, 200, 200, 128)  512      
 ibuted)                                                         
                

In [23]:
dimentia_classifier.multi_modal_model.summary()

Model: "dimentia_classifier"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 4, 200, 200  0           []                               
                                , 3)]                                                             
                                                                                                  
 time_distributed (TimeDistribu  (None, 4, 200, 200,  512        ['input_1[0][0]']                
 ted)                            128)                                                             
                                                                                                  
 time_distributed_1 (TimeDistri  (None, 4, 100, 100,  0          ['time_distributed[0][0]']       
 buted)                          128)                                           

In [37]:
dimentia_classifier.multi_modal_model.load_weights('dimentia_detector_v1.h5')
dimentia_classifier.image_model.load_weights('image_detector_v1.h5')
dimentia_classifier.tabular_model.load_weights('tabular_detector_v1.h5')

In [30]:
from sklearn.model_selection import train_test_split

# Assuming X_tabular, X_images, and y_reshaped are NumPy arrays

# Split the data into training and testing sets while maintaining the correspondence
X_tabular_train, X_tabular_test, X_images_train, X_images_test, y_train, y_test = train_test_split(
    X_reshaped, image_data, y_encoded, test_size=0.2, random_state=42
)

# Print the shapes of the resulting arrays
print("X_tabular_train shape:", X_tabular_train.shape)
print("X_tabular_test shape:", X_tabular_test.shape)
print("X_images_train shape:", X_images_train.shape)
print("X_images_test shape:", X_images_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_tabular_train shape: (297, 4, 9)
X_tabular_test shape: (75, 4, 9)
X_images_train shape: (297, 4, 200, 200, 3)
X_images_test shape: (75, 4, 200, 200, 3)
y_train shape: (297, 3)
y_test shape: (75, 3)


In [38]:
dimentia_train_r3_metric = dimentia_classifier.multi_modal_model.evaluate([X_tabular_train, X_images_train], y_train, use_multiprocessing = True, workers = 32, return_dict=True)
dimentia_test_r3_metric = dimentia_classifier.multi_modal_model.evaluate([X_tabular_test, X_images_test], y_test, use_multiprocessing = True, workers=32, return_dict=True)
print(f'Accuracy during training: {round(dimentia_train_r3_metric["accuracy"]*100, 2)}% and loss: {round(dimentia_train_r3_metric["loss"], 4)}')
print(f'Accuracy during test: {round(dimentia_test_r3_metric["accuracy"]*100, 2)}% and loss: {round(dimentia_test_r3_metric["loss"], 4)}')

Accuracy during training: 38.38% and loss: 3.6887
Accuracy during test: 45.33% and loss: 3.2494


In [None]:
# Make predictions on the test set
y_pred_prob = model.predict([X_tabular_test, X_images_test])

# Calculate ROC curve and AUC
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (AUC = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
# Calculate precision-recall curve and AUC
precision, recall, _ = precision_recall_curve(y_test, y_pred_prob)
pr_auc = average_precision_score(y_test, y_pred_prob)

# Plot Precision-Recall curve
plt.figure(figsize=(8, 8))
plt.step(recall, precision, color='b', alpha=0.2, where='post')
plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall curve (AUC = {:.2f})'.format(pr_auc))
plt.show()

In [None]:
# Calculate and print F1 score, recall, precision
y_pred = (y_pred_prob > 0.5).astype(int)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)

print('F1 Score:', f1)
print('Recall:', recall)
print('Precision:', precision)