In [None]:
# Imports all necessary libraries for data processing, visualization, model building, training, and evaluation in a multiclass CNN workflow
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from tqdm import tqdm
from imblearn.over_sampling import SMOTE

In [None]:
# Loads image file paths and corresponding labels from the dataset directory and creates a DataFrame for further processing
images = []
labels = []

for subfolder in tqdm(os.listdir('/kaggle/input/alzheimers-mri-images/Processed Dataset')):
    subfolder_path = os.path.join('/kaggle/input/alzheimers-mri-images/Processed Dataset', subfolder)

    for folder in os.listdir(subfolder_path):
        subfolder_path2 = os.path.join(subfolder_path, folder)

        for image_filename in os.listdir(subfolder_path2):
            image_path = os.path.join(subfolder_path2, image_filename)
            images.append(image_path)
            labels.append(folder)

In [None]:
# Creates a DataFrame with image file paths and their corresponding labels
df = pd.DataFrame({'image': images, 'label': labels})

# Displays the first few rows of the DataFrame to verify its contents
df

# Visualizes the class distribution in the dataset using a bar plot
plt.figure(figsize = (15, 8))
ax = sns.countplot(x = df.label, palette = 'Set1')
ax.set_xlabel("Class", fontsize = 20)
ax.set_ylabel("Count", fontsize = 20)
plt.title('The Number Of Samples For Each Class', fontsize = 20)
plt.grid(True)
plt.xticks(rotation = 45)
plt.show()

In [None]:
# Displays a random sample of images from the dataset with their corresponding class labels
plt.figure(figsize = (50, 50))

for n, i in enumerate(np.random.randint(0, len(df), 50)):
    plt.subplot(10, 5, n + 1)
    img = cv2.imread(df.image[i])
    img = cv2.resize(img, (224, 224))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.axis('off')
    plt.title(df.label[i], fontsize = 25)

In [None]:
# Prepares the image data generator and loads all images and labels into memory for model training
Size = (176, 176)

work_dr = ImageDataGenerator(
    rescale = 1. / 255
)

train_data_gen = work_dr.flow_from_dataframe(df, x_col = 'image', y_col = 'label', target_size = Size, batch_size = 6500, shuffle = False)

# Retrieves the image data and labels from the data generator for further processing
train_data, train_labels = next(train_data_gen)

# Obtains the list of class names from the data generator
class_num = list(train_data_gen.class_indices.keys())
class_num

In [None]:
# Applies SMOTE to balance the dataset by oversampling minority classes and reshapes the data for model input
sm = SMOTE(random_state = 42)
train_data, train_labels = sm.fit_resample(train_data.reshape(-1, 176 * 176 * 3), train_labels)
train_data = train_data.reshape(-1, 176, 176, 3)
print(train_data.shape, train_labels.shape)

In [None]:
# Visualizes the class distribution after applying SMOTE to ensure balanced classes
labels = [class_num[i] for i in np.argmax(train_labels, axis = 1)]
plt.figure(figsize = (15, 8))
ax = sns.countplot(x = labels, palette = 'Set1')
ax.set_xlabel("Class", fontsize = 20)
ax.set_ylabel("Count", fontsize = 20)
plt.title('The Number Of Samples For Each Class', fontsize = 20)
plt.grid(True)
plt.xticks(rotation = 45)
plt.show()

In [None]:
# Splits the balanced dataset into training, validation, and test sets and prints their shapes
X_train, X_test1, y_train, y_test1 = train_test_split(train_data, train_labels, test_size = 0.3, random_state = 42, shuffle = True, stratify = train_labels)
X_val, X_test, y_val, y_test = train_test_split(X_test1, y_test1, test_size = 0.5, random_state = 42, shuffle = True, stratify = y_test1)

print('X_train shape is ', X_train.shape)
print('X_test shape is ', X_test.shape)
print('X_val shape is ', X_val.shape)
print('y_train shape is ', y_train.shape)
print('y_test shape is ', y_test.shape)
print('y_val shape is ', y_val.shape)

In [None]:
# Defines the architecture of the convolutional neural network for multiclass classification
model = keras.models.Sequential()

# Builds the CNN model with multiple convolutional and pooling layers
model.add(keras.layers.Conv2D(32, kernel_size = (3, 3), strides = 2, padding = 'same', activation = 'relu', input_shape = (176, 176, 3)))
model.add(keras.layers.MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same'))

model.add(keras.layers.Conv2D(64, kernel_size = (3, 3), strides = 2, activation = 'relu', padding = 'same'))
model.add(keras.layers.MaxPool2D((2, 2), 2, padding = 'same'))

model.add(keras.layers.Conv2D(128, kernel_size = (3, 3), strides = 2, activation = 'relu', padding = 'same'))
model.add(keras.layers.MaxPool2D((2, 2), 2, padding = 'same'))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(1024, activation = 'relu'))

model.add(keras.layers.Dropout(0.3))
model.add(keras.layers.Dense(4, activation = 'softmax'))

In [None]:
# Displays a summary and a visual diagram of the model architecture
model.summary()

tf.keras.utils.plot_model(model, to_file = 'model.png', show_shapes = True, show_layer_names = True, show_dtype = True, dpi = 120)

# Compiles the model, sets up callbacks, and trains the model using the training and validation sets
checkpoint_cb = ModelCheckpoint("Complex_CNN_model.keras", save_best_only = True)

early_stopping_cb = EarlyStopping(patience = 10, restore_best_weights = True)

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

hist = model.fit(X_train, y_train, epochs = 50, validation_data = (X_val, y_val), callbacks = [checkpoint_cb, early_stopping_cb])

# Converts the training history to a DataFrame for further analysis
hist_ = pd.DataFrame(hist.history)
hist_

In [None]:
# Plots the training and validation loss and accuracy over epochs to visualize model performance
plt.figure(figsize = (15, 10))
plt.subplot(1, 2, 1)
plt.plot(hist_['loss'], label = 'Train_Loss')
plt.plot(hist_['val_loss'], label = 'Validation_Loss')
plt.title('Train_Loss & Validation_Loss', fontsize = 20)
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(hist_['accuracy'], label = 'Train_Accuracy')
plt.plot(hist_['val_accuracy'], label = 'Validation_Accuracy')
plt.title('Train_Accuracy & Validation_Accuracy', fontsize = 20)
plt.legend()
plt.show()

In [None]:
# Evaluates the trained model on the test set and prints the test loss and accuracy
score, acc = model.evaluate(X_test, y_test)

print('Test Loss =', score)
print('Test Accuracy =', acc)

# Generates predictions on the test set
predictions = model.predict(X_test)
y_pred = np.argmax(predictions, axis = 1)
y_test_ = np.argmax(y_test, axis = 1)

# Creates a DataFrame comparing actual and predicted class labels
df = pd.DataFrame({'Actual': y_test_, 'Prediction': y_pred})
df

In [None]:
# Displays a random sample of test images with their actual and predicted class labels and prediction confidence
plt.figure(figsize = (30, 70))

for n, i in enumerate(np.random.randint(0, len(X_test), 50)):
    plt.subplot(10, 5, n + 1)
    plt.imshow(X_test[i])
    plt.axis('off')
    plt.title(f"Actual: {class_num[y_test_[i]]}, \n Predicted: {class_num[y_pred[i]]}.\n Confidence: {round(predictions[i][np.argmax(predictions[i])], 0)}%", fontsize = 20)

In [None]:
# Computes and visualizes the normalized confusion matrix for the test set predictions
CM = confusion_matrix(y_test_, y_pred)
CM_percent = CM.astype('float') / CM.sum(axis = 1)[:, np.newaxis]
sns.heatmap(CM_percent, fmt = 'g', center = True, cbar = False, annot = True, cmap = 'Blues')
CM

# Prints the classification report for the test set predictions
ClassificationReport = classification_report(y_test_, y_pred)
print('Classification Report is : ', ClassificationReport)