In [None]:
# Imports all necessary libraries for building, training, evaluating, and visualizing a convolutional neural network for image classification
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Dense, Rescaling, Flatten
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
from tensorflow import keras
import tensorflow as tf
import seaborn as sns
from PIL import Image
import pandas as pd
import numpy as np
import os

In [None]:
# Displays an example image from the dataset to determine the input size for the neural network
%matplotlib inline
pil_im = Image.open('/kaggle/input/alzheimer-mri-healthy-vs-sick/Processed Dataset/training/Healthy/26 (100).jpg', 'r')
imshow(np.asarray(pil_im))

In [None]:
# Defines data source and parameters
image_size = (100, 100)  # Defines images of 100x100 dimension
batch_size = 32          # Defines number of images that are passed to the network per epoch

# Loads training dataset from directory
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/input/alzheimer-mri-healthy-vs-sick/Processed Dataset/training/",
    validation_split = 0.2,     # Uses 20% of images for validation
    subset = "training",
    seed = 1337,                # Sets random seed for reproducibility
    image_size = image_size,    # Sets size of dataset images
    batch_size = batch_size,
    label_mode = 'categorical'  # Uses categorical labels for multi-class classification
)

# Loads test dataset from directory
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/input/alzheimer-mri-healthy-vs-sick/Processed Dataset/test",
    validation_split = 0.2,     # Uses 20% of images for validation
    subset = "validation",
    seed = 1337,                # Sets random seed for reproducibility
    image_size = image_size,    # Sets size of dataset images
    batch_size = batch_size,
    label_mode = 'categorical'
)

# Loads validation dataset from directory
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/input/alzheimer-mri-healthy-vs-sick/Processed Dataset/validation",
    validation_split = 0.2,     # Uses 20% of images for validation
    subset = "validation",
    seed = 1337,                # Sets random seed for reproducibility
    image_size = image_size,    # Sets size of dataset images
    batch_size = batch_size,
    label_mode = 'categorical'
)

# Prefetches data for performance optimization
train_ds = train_ds.prefetch(buffer_size = 32)
test_ds = test_ds.prefetch(buffer_size = 32)
val_ds = val_ds.prefetch(buffer_size = 32)

In [None]:
# Counts the number of images in each class in the training dataset
! ls -1 '/kaggle/input/alzheimer-mri-healthy-vs-sick/Processed Dataset/training/Healthy' | wc -l
! ls -1 '/kaggle/input/alzheimer-mri-healthy-vs-sick/Processed Dataset/training/Sick' | wc -l

# Training directory path
train_dir = "/kaggle/input/alzheimer-mri-healthy-vs-sick/Processed Dataset/training"

# Counting the number of images in each class
class_counts = {cls: len(os.listdir(os.path.join(train_dir, cls))) for cls in os.listdir(train_dir)}

# Converting to DataFrame to be used for visualization
df = pd.DataFrame(list(class_counts.items()), columns=["Class", "Count"])

# Plotting the number of images per class
plt.figure(figsize = (15, 8))
ax = sns.barplot(x = df["Class"], y = df["Count"], palette = "Set1")
ax.set_xlabel("Class", fontsize = 20)
ax.set_ylabel("Count", fontsize = 20)
plt.title("The Number Of Samples For Each Class", fontsize = 20)
plt.grid(True)
plt.xticks(rotation = 45)
plt.show()

In [None]:
# Initializes the model in Sequential mode, adding one layer after another
model = keras.Sequential()

# Defines the architecture of the convolutional neural network, including normalization, convolutional, pooling, dropout, flatten, and dense layers for binary classification
model.add(Rescaling(scale = (1. / 127.5),  # Adds a layer for normalization of the dataset image set
                    offset = -1,
                    input_shape = (100, 100, 3)))

model.add(Conv2D(32, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))

model.add(Conv2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(256, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.5))

model.add(Conv2D(512, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.7))

model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.5))

model.add(Dense(2, activation = 'softmax'))

In [None]:
# Compiles the model by specifying the loss function, optimizer, and evaluation metric to prepare for training
model.compile(loss = tf.keras.losses.categorical_crossentropy,
    optimizer = tf.keras.optimizers.Adam(1e-3),
    metrics = ['accuracy']
)

In [None]:
# Displays a summary and a visual diagram of the model architecture
model.summary()

# Saves and displays the model architecture
tf.keras.utils.plot_model(model, to_file = 'model.png', show_shapes = True, show_layer_names = True, show_dtype = True, dpi = 120)

In [None]:
# Trains the model using the training and validation datasets, applying early stopping to prevent overfitting
epochs = 200  # Sets the number of epochs for training

# Defines early stopping to prevent overfitting
# Stops training if validation accuracy does not improve for 10 epochs
es = EarlyStopping(monitor = 'val_accuracy', mode = 'max', verbose = 1, patience = 10, restore_best_weights = True)

# Fits the model to the training and validation data
h = model.fit(
        train_ds,
        epochs = epochs,
        validation_data = val_ds,
        callbacks = [es]
)

In [None]:
# Graphical representation of the results obtained during the training phase
# Plots the training and validation accuracy and loss over epochs to visualize the model's learning progress
plt.plot(h.history['accuracy'])
plt.plot(h.history['val_accuracy'])
plt.plot(h.history['loss'])
plt.title('Model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['training', 'validation','loss'], loc='upper right')
plt.show()

In [None]:
# Obtains predictions and true labels
results = np.concatenate([(y, model.predict(x = x)) for x, y in val_ds], axis = 1)

# Gets predicted class indices
predictions = np.argmax(results[0], axis = 1)

# Gets true class indices
labels = np.argmax(results[1], axis = 1)

# Calculates confusion matrix
cf_matrix = confusion_matrix(labels, predictions)

# Plots confusion matrix
sns.heatmap(cf_matrix, annot = True, fmt = ".2f", cmap = "Blues")

# Prints classification report
print(classification_report(labels, predictions, digits = 4))

In [None]:
# Defines class names for Alzheimer's MRI dataset
class_names = ['Healthy', 'Sick']

# Generates predictions for the test dataset
predictions = model.predict(test_ds)

# Converts predictions to class labels
y_pred = np.argmax(predictions, axis = 1)

# Retrieves true labels from the test dataset
y_real = np.concatenate([y for x, y in test_ds], axis = 0)
y_real = np.argmax(y_real, axis = 1)  # Converts one-hot labels to class indices

# Computes confusion matrix
cm = confusion_matrix(y_real, y_pred)
cm_percent = cm.astype('float') / cm.sum(axis = 1)[:, np.newaxis] * 100  # Converts to percentage format

# Plots confusion matrix as a heatmap
plt.figure(figsize = (6, 6))
sns.heatmap(cm_percent, annot = True, fmt = ".2f", cmap = "Blues", xticklabels = class_names, yticklabels = class_names)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix (%)")
plt.show()

In [None]:
# Displays the predicted probabilities, predicted class, and actual class for each test sample, indicating whether each prediction is correct
for p, l in zip(predictions, y_real):
    probs_percent = [f"{prob*100:.2f}%" for prob in p]  # Convert probabilities to percentage
    predicted_class_idx = np.argmax(p)  # Gets predicted class index
    predicted_class_name = class_names[predicted_class_idx]  # Gets predicted class name

    print(f"Predictions: {probs_percent} -> Predicted class: {predicted_class_name} (Class {predicted_class_idx}), Actual Label: {class_names[l]}")

    if predicted_class_idx == l:
        print("Correct ✅\n")
    else:
        print("Incorrect ❌\n")