In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Concatenate, Reshape, TimeDistributed
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm

In [None]:
# Load CSV data
csv_path = r'Dimentia Dataset\dementia_dataset.csv'
data = pd.read_csv(csv_path)
data = data.fillna(0)

print(data.columns)
for column in data.drop(columns=['Group', 'Subject ID', 'MRI ID']).columns:
        data[column] = data[column].astype('category').cat.codes

In [None]:
def generate_data(df, extra_trees_classifier):
    X = df[['Age', 'EDUC', 'Visit', 'MR Delay', 'Hand', 'SES', 'MMSE', 'CDR', 'eTIV', 'nWBV', 'ASF']]
    print(f'Number of Features before feature selection {X.shape}')
    y = df['Group'] # set the y to the dependent output variable
    model = extra_trees_classifier()
    model.fit(X,y)
    print(model)
    return X, y, model
    
X, y, model = generate_data(data, ExtraTreesClassifier)

In [None]:
def encode_output(y):
    onehot = pd.get_dummies(y)
    output = onehot.to_numpy()
    
    return output

In [None]:
y_encoded = encode_output(y)
print(y_encoded.shape)
print(y_encoded[:5])

In [None]:
def reshape_input(data, alt_time_steps):
    if isinstance(data, pd.DataFrame):
        data = data.values
    
    # Reshape the data into a 3D array
    num_samples = data.shape[0] 
    num_features = data.shape[1]
    input = np.zeros((num_samples, alt_time_steps, num_features))
    for i in range(num_samples):
        if i+alt_time_steps < num_samples:
            input[i, :, :] = data[i:i+alt_time_steps, :]
            
    return input

In [None]:
def load_images(folder_path):
    images_dict = {}
    desired_size = (200, 200, 3)

    for root, dirs, files in tqdm(os.walk(folder_path), desc="Processing"):
#         print(root)
        # Skip the first entry (root is empty)
        if not files:
            continue
        
        # Sort the directories alphabetically
        dirs.sort()

        patient_images = []
        for file in sorted(files):  # Sort files alphabetically
            if file.endswith('.png') and "nifti" in file:
                # get image path
                image_path = os.path.join(root, file)

                # Load the header and image data
                image = keras.preprocessing.image.load_img(image_path, target_size=desired_size[:-1])
                image = keras.preprocessing.image.img_to_array(image)

                # Convert the image to RGB format
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Normalize the image to the range [0, 1]
                image = image.astype('float32') / 255.0

                # Append the preprocessed image to the list
                patient_images.append(image)

        # Store images in a dictionary with the folder as the key
        images_dict[root] = np.array(patient_images)

    # Convert the dictionary values to a NumPy array
    images_list = list(images_dict.values())
    images = np.array(images_list, dtype=object)

    return images

In [None]:
image_path = r'Dimentia Dataset/Output'
images = load_images(image_path)
images_sequences = np.array(images)
print(images_sequences.shape)

In [None]:
max_num_images = 4
image_shape = (200, 200, 3)
padded_images_sequence = [np.pad(sequence, ((0, max_num_images - sequence.shape[0]), (0, 0), (0, 0), (0, 0)), mode='constant') for sequence in images_sequences]
image_data = np.stack(padded_images_sequence)
desired_shape = (image_data[0].shape)
print(desired_shape)

In [None]:
selected_features = ['Age','EDUC','MR Delay','SES','MMSE','CDR','eTIV','nWBV','ASF']

In [None]:
time_step = 4
X = X[selected_features['features']]
X_reshaped = reshape_input(X, time_step)
print(X_reshaped.shape)
print(y_encoded.shape)

In [None]:
dimentia_classifier = model_load('dimentia_detector.h5')
dimentia_classifier.tabular_model.summary()
dimentia_classifier.image_model.summary()

In [None]:
dimentia_classifier.multi_modal_model.summary()

In [None]:
dimentia_train_r3_metric = dimentia_classifier.multi_modal_model.evaluate([X_tabular_train, X_images_train], y_train, use_multiprocessing = True, workers = 32, return_dict=True)
dimentia_test_r3_metric = dimentia_classifier.multi_modal_model.evaluate([X_tabular_test, X_images_test], y_test, use_multiprocessing = True, workers=32, return_dict=True)
print(f'Accuracy during training: {round(dimentia_train_r3_metric["accuracy"]*100, 2)}% and loss: {round(dimentia_train_r3_metric["loss"], 4)}')
print(f'Accuracy during test: {round(dimentia_test_r3_metric["accuracy"]*100, 2)}% and loss: {round(dimentia_test_r3_metric["loss"], 4)}')

In [None]:
from sklearn.model_selection import train_test_split

# Assuming X_tabular, X_images, and y_reshaped are NumPy arrays

# Split the data into training and testing sets while maintaining the correspondence
X_tabular_train, X_tabular_test, X_images_train, X_images_test, y_train, y_test = train_test_split(
    X_reshaped, image_data, y_encoded, test_size=0.2, random_state=42
)

# Print the shapes of the resulting arrays
print("X_tabular_train shape:", X_tabular_train.shape)
print("X_tabular_test shape:", X_tabular_test.shape)
print("X_images_train shape:", X_images_train.shape)
print("X_images_test shape:", X_images_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

In [None]:
# Make predictions on the test set
y_pred_prob = model.predict(X_test)

# Calculate ROC curve and AUC
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (AUC = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
# Calculate precision-recall curve and AUC
precision, recall, _ = precision_recall_curve(y_test, y_pred_prob)
pr_auc = average_precision_score(y_test, y_pred_prob)

# Plot Precision-Recall curve
plt.figure(figsize=(8, 8))
plt.step(recall, precision, color='b', alpha=0.2, where='post')
plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall curve (AUC = {:.2f})'.format(pr_auc))
plt.show()

In [None]:
# Calculate and print F1 score, recall, precision
y_pred = (y_pred_prob > 0.5).astype(int)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)

print('F1 Score:', f1)
print('Recall:', recall)
print('Precision:', precision)