In [None]:
#import important libraries
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as img

import cv2
import itertools
import pathlib
import warnings
from PIL import Image
from random import randint
warnings.filterwarnings('ignore')

from sklearn.metrics import matthews_corrcoef as MCC
from sklearn.metrics import balanced_accuracy_score as BAS
from sklearn.metrics import classification_report, confusion_matrix


import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.preprocessing import image_dataset_from_directory
from keras.utils.vis_utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.preprocessing.image import ImageDataGenerator as IDG
from tensorflow.keras.layers import SeparableConv2D, BatchNormalization, GlobalAveragePooling2D


import os
print(os.listdir("../input/alzheimerdata/Dataset"))

print("TensorFlow Version:", tf.__version__)

# Generate Split data folders

In [None]:
!pip install split-folders[full]

In [None]:
# load data form directory and split into train, test, and val
import splitfolders # or import splitfolders

WORK_DIR = ("../input/alzheimerdata/Dataset")
OUTPUT = "dataset" #where to store the split datasets .

splitfolders.ratio(WORK_DIR, output=OUTPUT, seed=1337, ratio=(.8, .1, .1)) # ratio of split are in order of train/val/test.

# Visual Sample Class Size

In [None]:
DATA_SIZE ={'Non_Demented': 0, 
        'Mild_Demented': 0, 
        'Moderate_Demented':0,
        'Very_Mild_Demented': 0}

for cls in os.listdir(WORK_DIR):
    for img in os.listdir(WORK_DIR + '/' + cls):
        DATA_SIZE[cls] =  DATA_SIZE[cls] + 1

keys = list(DATA_SIZE.keys())
values = list(DATA_SIZE.values())
  
fig = plt.figure(figsize = (10, 5))
 
plt.bar(keys, values, color=(0.2, 0.4, 0.6, 0.6), width = 0.4)

# Data Preparation

In [None]:
#Define some parameters for the loader
CLASSES = [ 'Non',
            'Mild',
            'Moderate',
            'VeryMild',
            ]

# For replicable results
SEED  = 123
tf.random.set_seed(SEED)
np.random.seed(SEED)

IMG_SIZE = 128
# Size of the images is (128,128)
IMAGE_SIZE = [128, 128]
DIM = (IMG_SIZE, IMG_SIZE)
#Get the number of classes
NUM_CLASSES= len(CLASSES)


 # get image lables
labels =dict(zip([0,1,2,3], CLASSES))
BATCH_SIZE=6500
print("Constant variables settings complete")

# Before Applying Augumentation

In [None]:
train_data = tf.keras.preprocessing.image_dataset_from_directory(
"./dataset/train",
seed=123,
image_size=DIM,
batch_size=BATCH_SIZE
)

test_data = tf.keras.preprocessing.image_dataset_from_directory(
"./dataset/test",
seed=123,
image_size=DIM,
batch_size=BATCH_SIZE
)

val_data = tf.keras.preprocessing.image_dataset_from_directory(
"./dataset/val",
seed=123,
image_size=DIM,
batch_size=BATCH_SIZE
)

# Visual Images before Augmentation

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in train_data.take(1):
    for i in range(12):
        ax = plt.subplot(4, 4, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title( CLASSES[labels[i]])
        plt.axis("off")

 # Data Augmentation

In [None]:
# All images will be rescaled by 1./255.
BRIGHT_RANGE = [0.8, 1.2]
DATA_FORMAT = "channels_last"
FILL_MODE = "constant"
HORZ_FLIP = True,
VERTICAL_FLIP = False,
ZOOM = [.99, 1.01]  

# Generate training and test data with Image Generator
# Create Image Data Generator for Train Set
train_datagen = IDG(rescale = 1./255, 
                    brightness_range=BRIGHT_RANGE, 
                    zoom_range=ZOOM,
                    data_format=DATA_FORMAT, 
                    fill_mode=FILL_MODE, 
                    horizontal_flip=HORZ_FLIP,
                    )


# Create Image Data Generator for Test/Validation Set
val_datagen = IDG(rescale = 1./255)

# Create Image Data Generator for Test/Validation Set
test_datagen= IDG(rescale = 1./255)


#Loading the Images
"""Flow training images in batches of 64 using train_datagen generator
Flow_from_directory function lets the classifier directly 
identify the labels from the name of the directories the image lies in"""

train_ds = train_datagen.flow_from_directory(
    directory="./dataset/train",
    seed=SEED,
    target_size=DIM, 
    batch_size=BATCH_SIZE,
)

test_ds = test_datagen.flow_from_directory(
    directory="./dataset/test",
    seed=SEED,
    target_size=DIM,
    shuffle=False, #So we can later compare it with predicted values without having indexing problem """
    batch_size=BATCH_SIZE,
  
)
val_ds = val_datagen.flow_from_directory(
    directory="./dataset/val",
    seed=SEED,
    target_size=DIM,
    shuffle=False, #So we can later compare it with predicted values without having indexing problem """
    batch_size=BATCH_SIZE,
  
)



# Visual images after agumentation

In [None]:
"""Let’s take a look at some of the train
  set images that we obtained from the Data Augmentation"""
 # get image lables
labels =dict(zip([0,1,2,3], CLASSES))
    
# get a batch of images
x,y = train_ds.next()
    
# display a grid of 9 images
plt.figure(figsize=(10, 10))
for i in range(12):
    ax = plt.subplot(4, 4, i + 1)
    idx = randint(0, 5119)
    plt.imshow(x[idx])
    plt.axis("off")
    plt.title("Class:{}".format(labels[np.argmax(y[idx])]))

# Generate Data and Label

In [None]:
# Fetch the data and the labels

train_data, train_labels = next(train_ds)
val_data, val_labels = next(val_ds) 
test_data, test_labels = next(test_ds)

#Print dimensions of the dataset
print(" Training data and shape and label dimension: ", train_data.shape, train_labels.shape)
print(" Training data and shape and label dimension: ", val_data.shape, val_labels.shape)
print(" Training data and shape and label dimension: ", test_data.shape, test_labels.shape)

In [None]:
from imblearn.over_sampling import SMOTE
#Since the classes are imbalanced we performing over-sampling of the data

counter = len(train_data)
print('Before smote train shape & label: ',counter, train_data.shape, train_labels.shape)

# oversampling the train dataset using SMOTE
sm = SMOTE()
#X_train, y_train = smt.fit_resample(X_train, y_train)
train_data_sm, train_labels_sm = sm.fit_resample(train_data.reshape(-1, IMG_SIZE * IMG_SIZE * 3), train_labels)

train_data_sm = train_data_sm.reshape(-1, IMG_SIZE, IMG_SIZE, 3)


print('After smote train shape & label: ', train_data_sm.shape, train_labels_sm.shape)

# Checkout the data shape

In [None]:
#Let’s visualize sample from the training and  test data 
from tensorflow.keras.utils import to_categorical
print('Training data shape : ', train_data_sm.shape, train_labels_sm.shape)
print('Testing data shape : ', test_data.shape, test_labels.shape)
#print("validation data and shape: ", val_data.shape, val_labels.shape)

# Find the unique numbers from the train labels
classes = dict(zip([0,1,2,3], CLASSES))
classes_num = len(classes)
print('Total number of outputs : ', classes_num)
print('Output classes : ', classes)
plt.figure(figsize=[10,5])

# Display the first image in training data
plt.subplot(121)
plt.imshow(train_data[0,:,:], cmap='gray')
plt.title("Train Ground Truth : {}".format(train_labels[0]))

 
# Display the first image in testing data
plt.subplot(122)
plt.imshow(test_data[0,:,:], cmap='gray')
plt.title("Test Ground Truth : {}".format(train_labels[0]))

# Convolutional Neural Network Design

# Train the model

In [None]:
#Defining a custom callback function to stop training our model when accuracy goes above 99%

class MyCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('val_acc') > 0.99:
            print("\nReached accuracy threshold! Terminating training.")
            self.model.stop_training = True
            
callback = MyCallback()

#EarlyStopping callback to make sure model is always learning
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=2)



# Defining some neccessary imports
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization

cnn4 = Sequential()
cnn4.add(keras.layers.Input(shape=(IMG_SIZE,IMG_SIZE, 3)))
cnn4.add(Conv2D(filters=16,kernel_size=(3,3),padding='same',activation='relu'))
cnn4.add(MaxPooling2D(pool_size=(2, 2)))
cnn4.add(Dropout(0.2))

cnn4.add(Conv2D(filters=32,kernel_size=(3,3),padding='same', activation='relu'))
cnn4.add(BatchNormalization())
cnn4.add(MaxPooling2D(pool_size=(2, 2)))
cnn4.add(Dropout(0.15))#15

cnn4.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
cnn4.add(BatchNormalization())
cnn4.add(Dropout(0.15)) #20

cnn4.add(Flatten())


cnn4.add(Dense(128, activation='relu', kernel_initializer="he_normal"))
cnn4.add(BatchNormalization())
cnn4.add(Dropout(0.5))


cnn4.add(Dense(64, activation='relu'))
cnn4.add(Dropout(0.5))



# Output neuron. 
cnn4.add(Dense(NUM_CLASSES, activation='softmax'))

        
#Defining other parameters for our CNN model
Metrics = [tf.keras.metrics.CategoricalAccuracy(name='acc'),
           tf.keras.metrics.AUC(name='auc'),
           tf.keras.metrics.Precision(name='precision'),
           tfa.metrics.F1Score(NUM_CLASSES)]

callbacks = [MyCallback]

cnn4.compile(loss="categorical_crossentropy",
             optimizer = "Adam", metrics=Metrics)

cnn4.summary()

In [None]:
#Fit the training data to the model and validate it using the validation data
from time import time
StartTime = time()
EPOCHS = 100
history = cnn4.fit(train_data_sm, train_labels_sm, 
                   validation_data=(val_data, val_labels), 
                   callbacks=[callback],
                   epochs=EPOCHS)
EndTime = time()
print("{:.2f}".format(round(EndTime-StartTime,2)/60), "Minutes ****")

# Check for Overfitting

In [None]:
#Plotting the trend of the metrics during training
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 3, figsize = (30, 5))
ax = ax.ravel()

for i, metric in enumerate(["acc", "auc", "loss"]):
    ax[i].plot(history.history[metric])
    ax[i].plot(history.history["val_" + metric], )
    ax[i].set_title("Model {}".format(metric))
    ax[i].set_xlabel("Epochs")
    ax[i].set_ylabel(metric)
    ax[i].legend(["train", "val"])

# Model Evaluation

In [None]:
#Evaluating the model on the data with 0.2drop

train_scores = cnn4.evaluate(train_data, train_labels)
val_scores = cnn4.evaluate(val_data, val_labels)
test_scores = cnn4.evaluate(test_data, test_labels)

print("Training Accuracy: %.2f%%"%(train_scores[1] * 100))
print("Validation Accuracy: %.2f%%"%(val_scores[1] * 100))
print("Testing Accuracy: %.2f%%"%(test_scores[1] * 100))

# Make pediction on trained model

In [None]:
#Predicting the test data

pred_labels = cnn4.predict(test_data)

In [None]:
#Print the classification report of the tested data

#Since the labels are softmax arrays, we need to roundoff to have it in the form of 0s and 1s,
#similar to the test_labels
def roundoff(arr):
    """To round off according to the argmax of each predicted label array. """
    arr[np.argwhere(arr != arr.max())] = 0
    arr[np.argwhere(arr == arr.max())] = 1
    return arr

for labels in pred_labels:
    labels = roundoff(labels)

print(classification_report(test_labels, pred_labels, target_names=CLASSES))

# Confusion matrix 

In [None]:
# Plot the confusion matrix to understand the classification in detail

pred_ls = np.argmax(pred_labels, axis=1)
test_ls = np.argmax(test_labels, axis=1)

conf_arr = confusion_matrix(test_ls, pred_ls)

plt.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')

ax = sns.heatmap(conf_arr, cmap='Blues', annot=True, fmt='d', xticklabels=CLASSES, yticklabels=CLASSES)

plt.title('Alzheimer\'s Disease Diagnosis')
plt.xlabel('Prediction')
plt.ylabel('Truth')
plt.show(ax)

In [None]:
#Printing some other classification metrics

print("Balanced Accuracy Score: {} %".format(round(BAS(test_ls, pred_ls) * 100, 2)))
print("Matthew's Correlation Coefficient: {} %".format(round(MCC(test_ls, pred_ls) * 100, 2)))

# Visualize prediction on the test data

In [None]:
#visualize some of the prediction to made from the model
# get image lables
labels =dict(zip([0,1,2,3], CLASSES))
mapping  =dict(zip([0,1,2,3], CLASSES))
rng = np.random.RandomState(42)
    
# get a batch of images
x,y = test_ds.next()
plt.figure(figsize=(20, 20))
for i in range(16):
    ax = plt.subplot(4, 4, i + 1)
    idx = rng.choice(range(len(test_data)))
    plt.imshow(x[idx])
    pred = cnn4.predict(x[idx:idx+1]).argmax(axis=1)[0]
    if((mapping[np.argmax(y[idx])])==(mapping[pred])):
        plt.title("Actual Target Value : {}".format(mapping[np.argmax(y[idx])]), 
                  fontdict={'color':'green'})
        plt.ylabel("Predicted Target Values : {}".format(mapping[pred]),fontdict={'color':'blue'})
            
    else:
        plt.title("Actual: {}".format(mapping[np.argmax(y[idx])]))
        plt.ylabel("Predicted: {}".format(mapping[pred]),fontdict={'color':'red'})
        plt.gca().axes.yaxis.set_ticklabels([])        
        plt.gca().axes.xaxis.set_ticklabels([])   

# Explaining our model using limeExplainer

In [None]:
import lime

#create an instance of LimeImageExplainer.
from lime import lime_image
explainer = lime_image.LimeImageExplainer(random_state=123)

explainer

# Check Prediciton with Explainer

In [None]:
"""
Below, we have first randomly selected a sample from test data. 
Then, we have printed the actual label of data and the predicted label.
"""
from skimage.segmentation import felzenszwalb, flood_fill, flood

mapping  =dict(zip([0,1,2,3], CLASSES))


rng = np.random.RandomState(123)
idx = rng.choice(range(len(test_data)))


print("Actual Target Value     : {}".format(mapping[np.argmax(test_labels[idx])]))
pred = cnn4.predict(test_data[idx:idx+1]).argmax(axis=1)[0]
print("Predicted Target Values : {}".format(mapping[pred]))

explanation = explainer.explain_instance(test_data[idx].astype('double'), cnn4.predict,
                                         top_labels=2, hide_color=0, num_samples=123)

# Generate image and mask

In [None]:
"""we generate an image and mask that has pixels contributing positively
to the prediction highlighted. """
img, mask = explanation.get_image_and_mask(np.argmax(test_labels[idx]), positive_only=True, hide_rest=True)

img.shape, mask.shape

# Visuals of pixels contributing postively

In [None]:
#Pixels Contributing Positively to Prediction
from skimage.segmentation import mark_boundaries
import matplotlib.pyplot as plt

def plot_comparison(main_image, img, mask):
    fig = plt.figure(figsize=(15,5))

    ax = fig.add_subplot(141)
    ax.imshow(main_image, cmap="gray");
    ax.set_title("Original Image")
    ax = fig.add_subplot(142)
    ax.imshow(img);
    ax.set_title("Image")
    ax = fig.add_subplot(143)
    ax.imshow(mask);
    ax.set_title("Mask")
    ax = fig.add_subplot(144)
    ax.imshow(mark_boundaries(img, mask, color=(0,1,0)));
    ax.set_title("Image+Mask Combined");

plot_comparison(test_data[idx], img, mask)

# Visaul of pixels contributing negatively

In [None]:
#visualization showing pixels that contributes negatively to the prediction category
img, mask = explanation.get_image_and_mask(np.argmax(test_labels[idx]), positive_only=False, negative_only=True, hide_rest=True)

img.shape, mask.shape

plot_comparison(test_data[idx], img, mask)

# Explaination using segmentation

In [None]:
#Explain True Predictions With Segmentation Method 
from skimage.segmentation import felzenszwalb

rng = np.random.RandomState(123)
idx = rng.choice(range(len(test_data)))

print("Actual Target Value     : {}".format(mapping[np.argmax(test_labels[idx])]))
pred = cnn4.predict(test_data[idx:idx+1]).argmax(axis=1)[0]
print("Predicted Target Values : {}".format(mapping[pred]))

explanation = explainer.explain_instance(test_data[idx].squeeze(), cnn4.predict,
                                         segmentation_fn=felzenszwalb, random_seed=123)
explanation

# Segmentation positive pixels 

In [None]:
#Pixels Contributing Positively to Prediction
img, mask = explanation.get_image_and_mask(np.argmax(test_labels[idx]), positive_only=True, hide_rest=True)

plot_comparison(test_data[idx], img, mask)

# Segmentation negetive pixels

In [None]:
#Pixels Contributing Negetively to Prediction
img, mask = explanation.get_image_and_mask(np.argmax(test_labels[idx]), positive_only=False, negative_only=True, hide_rest=True)

plot_comparison(test_data[idx], img, mask)

# Explaining wrong prediction

In [None]:
#Explain Wrong Prediction
from skimage.segmentation import felzenszwalb
rng = np.random.RandomState(123)
idx = rng.choice(np.argwhere(test_labels != pred_labels).flatten())

print("Actual Target Value     : {}".format(mapping[np.argmax(test_labels[idx])]))
pred = cnn4.predict(test_data[idx:idx+1]).argmax(axis=1)[0]
print("Predicted Target Values : {}".format(mapping[pred]))

explanation = explainer.explain_instance(test_data[idx].squeeze(), cnn4.predict, 
                                         segmentation_fn=felzenszwalb, random_seed=123)

explanation

In [None]:
#Pixels Contributing Negetively to Prediction
img, mask = explanation.get_image_and_mask(np.argmax(test_labels[idx]), positive_only=True, hide_rest=True)

plot_comparison(test_data[idx], img, mask)


In [None]:
#Pixels Contributing Negetively to Prediction
img, mask = explanation.get_image_and_mask(np.argmax(test_labels[idx]), positive_only=False, negative_only=True, hide_rest=True)
plot_comparison(test_data[idx], img, mask)