In [None]:
import tensorflow as tf

Define and initialize key constants and configuration parameters to efficiently handle a dataset of MRI images aimed at detecting Alzheimer’s disease. 

In [None]:
ORIGINAL_DS = "/kaggle/input/alzheimers-dataset-4-class-of-images/Alzheimer_s Dataset/"
PROCESSED_DS = "dataset/"
WIDTH = 176
HEIGHT = 208
BATCH_SIZE = 6000
VALIDATION_SPLIT = 0.2

Since *SMOTE* is being used but the original dataset has to remain unchanged, a copy of the dataset is created on which the necessary modifications will be applied.

In [None]:
import os
from distutils.dir_util import copy_tree, remove_tree

if os.path.exists(PROCESSED_DS):
    remove_tree(PROCESSED_DS)
    
os.makedirs(PROCESSED_DS)
copy_tree(ORIGINAL_DS + "train/", PROCESSED_DS)
copy_tree(ORIGINAL_DS + "test/", PROCESSED_DS)

Here, a set of images is generated for training by applying data augmentation, specifying brightness adjustments for some images and zoom modifications for others. <br>
For this purpose, *fill_mode="nearest"* is used, which fills pixels beyond the image borders with the value of the nearest pixel.

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# creates an image generator for training
datagen = ImageDataGenerator(
    rescale=1./255,
    brightness_range=[0.8, 1.2],
    zoom_range=[0.99, 1.01],
    fill_mode="nearest"
)

In [None]:
# creates a data generator for trainign
train_generator = datagen.flow_from_directory(
    directory=PROCESSED_DS,
    target_size=(WIDTH, HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    subset='training'
)

In [None]:
# test data and labels extraction
train_data, train_labels = next(train_generator)

The dataset is extremely imbalanced: specifically, **NonDemented** and **VeryMildDemented** are the majority classes, while the other two are minority classes. <br>
To balance the dataset, *SMOTE* is applied to the training data. <br>
After applying *SMOTE*, the balanced training data is returned to its original form. <br> 
A seed is used for *SMOTE* to ensure reproducible results.

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
train_data_resampled, train_labels_resampled = smote.fit_resample(train_data.reshape(-1, WIDTH * HEIGHT * 3), train_labels)
train_data_resampled = train_data_resampled.reshape(-1, WIDTH, HEIGHT, 3)

The balanced data and corresponding labels are divided into two sets: one for training and one for validation, using *VALIDATION_SPLIT* as the percentage for the validation set. <br>
The training set is further split to create an internal validation set, in order to improve training performance.

In [None]:
from sklearn.model_selection import train_test_split

# data split into trai, validation and test set
train_data, test_data, train_labels, test_labels = train_test_split(train_data_resampled, train_labels_resampled, test_size=VALIDATION_SPLIT, random_state=42)
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=VALIDATION_SPLIT, random_state=42)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# gets classes names from generator
class_names = list(train_generator.class_indices.keys())

# counts number of examples for every class after applying SMOTE
unique, counts = np.unique(np.argmax(train_labels_resampled, axis=1), return_counts=True)

# plots the result data after applying SMOTE
plt.figure(figsize=(10, 6))
plt.bar(class_names, counts)
plt.xlabel('Classe')
plt.ylabel('Numero di Esempi')
plt.title('Distribuzione delle Classi dopo SMOTE')
plt.xticks(rotation=45)  # Rotazione delle etichette sull'asse x per migliorare la leggibilità
plt.show()


A neural network model is built from scratch, where each convolutional layer is followed by two additional layers: **BatchNormalization**, which helps the network train faster and more stably, and **MaxPooling**, which reduces the image size while retaining only the most important features. <br>
These layers are then flattened into a one-dimensional vector, and two fully connected layers are added. <br>
One of these is the output layer, which has as many neurons as there are classes, while the other includes a Dropout rate of 50% to prevent overfitting. <br>
The activation function of the output layer is **softmax**, allowing the neuron with the highest activation to indicate the most probable class.

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, Flatten, MaxPooling2D, BatchNormalization

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(WIDTH, HEIGHT, 3)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(4, activation='softmax'),
])

The model is then compiled, using a lower *learning_rate* to prevent the weights from being updated too quickly. The **Adam** optimizer and the **categorical_crossentropy** loss function are used, both of which are commonly applied to multi-class classification problems. <br>
The chosen metric is *accuracy*, which measures the percentage of correct predictions made by the model.

In [None]:
from tensorflow.keras.optimizers import Adam

learning_rate = 1e-6 
optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer= optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

EPOCHS = 30
train_info = model.fit(train_data, train_labels,
                    epochs=EPOCHS,
                    validation_data=(val_data, val_labels),
                    callbacks=[early_stopping])

In [None]:
import matplotlib.pyplot as plt

# loss training curve plot
plt.figure(figsize=(12, 6))
plt.plot(train_info.history['loss'], label='Training Loss')
plt.plot(train_info.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Learning Curve - Loss')
plt.legend()
plt.show()

# accuracy training curve plot
plt.figure(figsize=(12, 6))
plt.plot(train_info.history['accuracy'], label='Training Accuracy')
plt.plot(train_info.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Learning Curve - Accuracy')
plt.legend()
plt.show()

Model evaluation using the test set which contains the test images and labels.

In [None]:
test_scores = model.evaluate(test_data, test_labels)
print(f"Testing Accuracy: {test_scores[1] * 100:.2f}%")

Predictions are made on the test data, returning an array of class probabilities for each image. Then, an array containing the highest probability for each image is created, which is used to generate a confusion matrix.

In [None]:
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

pred_labels = model.predict(test_data)
pred_ls = np.argmax(pred_labels, axis=1)
test_ls = np.argmax(test_labels, axis=1)
conf_arr = confusion_matrix(test_ls, pred_ls)

# shows a prediction heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(conf_arr, annot=True, fmt='d', cmap='Blues', xticklabels=train_generator.class_indices.keys(), yticklabels=train_generator.class_indices.keys())
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix Heatmap')
plt.show()

print(classification_report(test_ls, pred_ls, target_names=train_generator.class_indices.keys()))