In [None]:
# Imports libraries for data processing, visualization, and neural network construction
from PIL import Image
from matplotlib.pyplot import imshow
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Dense, Rescaling, Flatten
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
import os

In [None]:
# Displays an example image to determine input size for the network
%matplotlib inline
pil_im = Image.open('/kaggle/input/alzheimers-dataset-early-stage-vs-health/Processed Dataset/training/Non Demented/26 (100).jpg', 'r')
imshow(np.asarray(pil_im))

In [None]:
# Loads and prepares training, validation, and test datasets with batching and prefetching
image_size = (100, 100)  # Defines image dimensions
batch_size = 32          # Sets batch size for training

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/input/alzheimers-dataset-early-stage-vs-health/Processed Dataset/training",
    validation_split = 0.2,     # Splits 20% of images for validation
    subset = "training",
    seed = 1337,                # Sets random seed for reproducibility
    image_size = image_size,    # Sets image size
    batch_size = batch_size,
    label_mode = 'categorical'  # Uses categorical labels for multi-class classification
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/input/alzheimers-dataset-early-stage-vs-health/Processed Dataset/validation",
    validation_split = 0.2,     # Splits 20% of images for validation
    subset = "validation",
    seed = 1337,                # Sets random seed for reproducibility
    image_size = image_size,    # Sets image size
    batch_size = batch_size,
    label_mode = 'categorical'
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/input/alzheimers-dataset-early-stage-vs-health/Processed Dataset/test",
    validation_split = 0.2,     # Splits 20% of images for validation
    subset = "validation",
    seed = 1337,                # Sets random seed for reproducibility
    image_size = image_size,    # Sets image size
    batch_size = batch_size,
    label_mode = 'categorical'
)

train_ds = train_ds.prefetch(buffer_size = 32)  # Prefetches training data
val_ds = val_ds.prefetch(buffer_size = 32)      # Prefetches validation data
test_ds = test_ds.prefetch(buffer_size = 32)    # Prefetches test data

In [None]:
# Counts and visualizes the number of images per class in the training set
! ls -1 '/kaggle/input/alzheimers-dataset-early-stage-vs-health/Processed Dataset/training/Non Demented' | wc -l
! ls -1 '/kaggle/input/alzheimers-dataset-early-stage-vs-health/Processed Dataset/training/Very Mild Demented' | wc -l

# Defines training directory path
train_dir = "/kaggle/input/alzheimers-dataset-early-stage-vs-health/Processed Dataset/training"

# Counts the number of images in each class
class_counts = {cls: len(os.listdir(os.path.join(train_dir, cls))) for cls in os.listdir(train_dir)}

# Converts class counts to DataFrame
df = pd.DataFrame(list(class_counts.items()), columns = ["Class", "Count"])

# Plots the number of images per class
plt.figure(figsize = (15, 8))
ax = sns.barplot(x = df["Class"], y = df["Count"], palette = "Set1")
ax.set_xlabel("Class", fontsize = 20)
ax.set_ylabel("Count", fontsize = 20)
plt.title("The Number Of Samples For Each Class", fontsize = 20)
plt.grid(True)
plt.xticks(rotation = 45)
plt.show()

In [None]:
# Builds and compiles the convolutional neural network model
model = keras.Sequential()  # Initializes sequential model

In [None]:
model.add(Rescaling(scale = (1. / 127.5),  # Adds normalization layer
                    offset = -1,
                    input_shape = (100, 100, 3)))

model.add(Conv2D(32, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))

model.add(Conv2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(256, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.5))

model.add(Conv2D(512, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.7))

model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.5))

model.add(Dense(2, activation = 'softmax'))

In [None]:
# Compiles the model with loss, optimizer, and accuracy metric
model.compile(loss = tf.keras.losses.categorical_crossentropy,
              optimizer = tf.keras.optimizers.Adam(1e-3),
              metrics = ['accuracy'])

# Summarizes and visualizes the model architecture
model.summary()

# Saves a visual representation of the model to a file
tf.keras.utils.plot_model(model, to_file = 'model.png', show_shapes = True, show_layer_names = True, show_dtype = True, dpi = 120)

In [None]:
# Trains the model with early stopping based on validation accuracy
epochs = 200  # Sets number of training epochs

# Configures early stopping callback
es = EarlyStopping(monitor = 'val_accuracy', mode = 'max', verbose = 1, patience = 10, restore_best_weights = True)

# Fits the model to training and validation data
h = model.fit(
        train_ds,
        epochs = epochs,
        validation_data = val_ds,
        callbacks = [es]
)

In [None]:
# Plots training and validation accuracy and loss over epochs, side by side
fig, axs = plt.subplots(1, 2, figsize = (14, 5))

# Plots training and validation accuracy
axs[0].plot(h.history['accuracy'], label = 'Training Accuracy')
axs[0].plot(h.history['val_accuracy'], label = 'Validation Accuracy')
axs[0].set_title('Model Accuracy')
axs[0].set_ylabel('Accuracy')
axs[0].set_xlabel('Epoch')
axs[0].legend(loc = 'lower right')
axs[0].grid(True)

# Plots training and validation loss
axs[1].plot(h.history['loss'], label = 'Training Loss')
axs[1].plot(h.history['val_loss'], label = 'Validation Loss')
axs[1].set_title('Model Loss')
axs[1].set_ylabel('Loss')
axs[1].set_xlabel('Epoch')
axs[1].legend(loc = 'upper right')
axs[1].grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Computes and visualizes the confusion matrix for validation data with class names
class_names = ['Healthy', 'Sick']

# Concatenates true and predicted labels
results = np.concatenate([(y, model.predict(x = x)) for x, y in val_ds], axis = 1)

# Gets predicted class indices
predictions = np.argmax(results[0], axis = 1)

# Gets true class indices
labels = np.argmax(results[1], axis = 1)

# Computes confusion matrix
cf_matrix = confusion_matrix(labels, predictions)

# Plots confusion matrix
sns.heatmap(cf_matrix, annot = True, fmt = "d", cmap = "Blues", xticklabels = class_names, yticklabels = class_names)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix (Validation Set)")
plt.show()

# Prints classification report
print(classification_report(labels, predictions, target_names = class_names, digits = 4))

In [None]:
# Generates predictions for the test set and obtain the true labels
y_pred = np.argmax(model.predict(test_ds), axis = 1)
y_real = np.concatenate([y for x, y in test_ds], axis = 0)
y_real = np.argmax(y_real, axis = 1)

# Plots the confusion matrix as percentages for the test set
cm = confusion_matrix(y_real, y_pred)  # Computes confusion matrix for test set
cm_percent = cm.astype('float') / cm.sum(axis = 1)[:, np.newaxis] * 100  # Converts confusion matrix to percentage format

# Plots confusion matrix as heatmap
plt.figure(figsize = (6, 6))
sns.heatmap(cm_percent, annot = True, fmt = ".2f", cmap = "Blues", xticklabels = class_names, yticklabels = class_names)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix (%)")
plt.show()

In [None]:
# Makes predictions on the test set and compares them to true labels
class_names = ['Healthy', 'Sick']

# Generates predictions for the test dataset
predictions = model.predict(test_ds)

# Converts predictions to class labels
y_pred = np.argmax(predictions, axis = 1)

# Retrieves true labels from test dataset
y_real = np.concatenate([y for x, y in test_ds], axis = 0)
y_real = np.argmax(y_real, axis = 1)  # Converts one-hot labels to class indices

# Displays predictions and compares with true labels
for p, l in zip(predictions, y_real):
    probs_percent = [f"{prob*100:.2f}%" for prob in p]       # Converts probabilities to percentage
    predicted_class_idx = np.argmax(p)                       # Gets predicted class index
    predicted_class_name = class_names[predicted_class_idx]  # Gets predicted class name

    print(f"Predictions: {probs_percent} -> Predicted class: {predicted_class_name} (Class {predicted_class_idx}), Actual Label: {class_names[l]}")

    if predicted_class_idx == l:
        print("Correct ✅\n")
    else:
        print("Incorrect ❌\n")