<h1>Preprocessing </h1>

In [2]:
import tensorflow as tf
import cv2
import numpy as np
import os
import sys # for debugging 

In [3]:

# # This function preprocesses the image by reading in the image apply grayscale make all the sizes the same and 
# def preprocess_image(file_path, img_size):
#     img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) # Grayscale will even the playing field if we start getting different types of images. If the images color is a factor we can take out grayscale
#     img = cv2.resize(img, img_size)
#     img = img.astype('float')/255.0 # Make the pixels become float and normalize to 0-1 for normalization
#     return img







# This function preprocesses the image by reading in the image apply grayscale make all the sizes the same and 
def preprocess_image(file_path, img_size):
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) # Grayscale will even the playing field if we start getting different types of images. If the images color is a factor we can take out grayscale
    
    # Thresholding to remove black background
    _, binary_image = cv2.threshold(img, 10, 255, cv2.THRESH_BINARY)
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=8)
    largest_component_label = np.argmax(stats[1:, cv2.CC_STAT_AREA]) + 1
    brain_mask = (labels == largest_component_label).astype(np.uint8) * 255
    x, y, w, h = cv2.boundingRect(brain_mask)
    img = img[y:y+h, x:x+w]
    
    img = cv2.resize(img, img_size)
    img = img.astype('float')/255.0 # Make the pixels become float and normalize to 0-1 for normalization
    return img


target_size =(224, 224)

# This function will pull from the directory and all subdirectory for the image and give it a label to the directory it is in
def load_images_from_directory(directory):
    images = []
    labels = []
    # Iterates through all subdirectories
    for subdir in os.listdir(directory):
        label = subdir #Make the subdirectory name be a label
        subdir_path = os.path.join(directory, subdir)

        # Checks if the object it is looking at is a directory and if it is go into the directory and get all the files and preprocess them
        if os.path.isdir(subdir_path):
            for image in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, image)

                image = preprocess_image(file_path, target_size)

                # Append to the arrays after preprocessing
                images.append(image)
                labels.append(label)

    return np.array(images), np.array(labels)




In [4]:


# Define the directory paths for the training and test datasets
train_dir = "./Alzheimer_s Dataset/train"
test_dir = "./Alzheimer_s Dataset/test"
# single_test_dir = "./Alzheimer_s Dataset/single_test"

# Load images and labels from the training directory
alz_images_train, alz_labels_train = load_images_from_directory(train_dir)

# Load images and labels from the test directory
alz_images_test, alz_labels_test = load_images_from_directory(test_dir)

# alz_single_images_test, alz_single_labels_test = load_images_from_directory(single_test_dir)

# Print information about the training dataset
print("Train")
print('Image shape:', alz_images_train.shape)
print('Labels shape:', alz_labels_train.shape)

# Print information about the test dataset
print("\nTest")
print('Image shape:', alz_images_test.shape)
print('Labels shape:', alz_labels_test.shape)


# print("\nSingle Test")
# print('Image shape:', alz_single_images_test.shape)
# print('Labels shape:', alz_single_labels_test.shape)


# np.set_printoptions(threshold=sys.maxsize) # for debugging

# print('Image train:', alz_single_images_test) # for debugging

# The output of the shape follows this
#  (X, X1, X2)
# X is the number of pictures in the array   
# X1 is the number of rows for a single picture (should be 224 since that is the scale)
# X2 is the number of columns in each picture  (should be 224 since that is the scale)
#  *Scale can be change to 207 since that is how the data is processed. 
# 
# When pull out the full array, you see alot of 0 at the start and end and that is because of the black around the brain
# 


Train
Image shape: (5121, 224, 224)
Labels shape: (5121,)

Test
Image shape: (1279, 224, 224)
Labels shape: (1279,)


<h1> Aaron's Algorithm </h1>
CNN GCNN or similar neural networks that can be adjusted in between each other

In [5]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

alz_labels_train_encoded = label_encoder.fit_transform(alz_labels_train)
alz_labels_test_encoded = label_encoder.fit_transform(alz_labels_test)

num_classes = len(label_encoder.classes_)

alz_labels_train_onehot = tf.keras.utils.to_categorical(alz_labels_train_encoded, num_classes)
alz_labels_test_onehot = tf.keras.utils.to_categorical(alz_labels_test_encoded, num_classes)

#np.set_printoptions(threshold=sys.maxsize) # for debugging
#print(alz_labels_train_onehot)

print("Training labels shape (one-hot encoded):", alz_labels_train_onehot.shape)
print("Testing labels shape (one-hot encoded):", alz_labels_test_onehot.shape)

# print('Image train:', alz_images_train) # for debugging


# 0 = MildDemented
# 1 = ModerateDemented
# 2 = NonDemented
# 3 = VeryMildDemented

Training labels shape (one-hot encoded): (5121, 4)
Testing labels shape (one-hot encoded): (1279, 4)


In [6]:
img_height = target_size[1]
img_width = target_size[0]
num_channels = 1

<h1> CNN </h1>

In [39]:
#3RD BEST MODEL
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Define model architecture with batch normalization
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(img_height, img_width, num_channels), 
           kernel_regularizer=regularizers.l2(0.1)),
    BatchNormalization(),
    MaxPooling2D((2,2)),

    Conv2D(64, (3,3), activation='relu', kernel_regularizer=regularizers.l2(0.1)),
    BatchNormalization(),
    MaxPooling2D((2,2)),

    Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.1)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Flatten(),

    Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.1)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model with Adam optimizer and categorical crossentropy loss
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model with early stopping and learning rate scheduler
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
learning_rate_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.0001)

history = model.fit(alz_images_train, alz_labels_train_onehot, epochs=30, batch_size=32,
                     validation_data=(alz_images_test, alz_labels_test_onehot), 
                     callbacks=[early_stopping, learning_rate_scheduler])


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30


In [50]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.callbacks import EarlyStopping

# THE 2ND BEST
# Learning rate scheduler - Exponential Decay
def exponential_decay(epoch, initial_lr=0.001, decay_rate=0.9):
    return initial_lr * np.power(decay_rate, epoch)

#THE BEST
# Learning rate scheduler - Cyclic Learning Rate
def cyclic_lr(epoch, lr_max=0.001, lr_min=0.0001, step_size=8):
    cycle = np.floor(1 + epoch / (2 * step_size))
    x = np.abs(epoch / step_size - 2 * cycle + 1)
    lr = lr_min + (lr_max - lr_min) * np.maximum(0, (1 - x))
    return lr

# Define model architecture with batch normalization
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(img_height, img_width, num_channels), 
           kernel_regularizer=regularizers.l2(0.1)),
    BatchNormalization(),
    MaxPooling2D((2,2)),

    Conv2D(64, (3,3), activation='relu', kernel_regularizer=regularizers.l2(0.1)),
    BatchNormalization(),
    MaxPooling2D((2,2)),

    Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.1)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Flatten(),

    Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.1), kernel_initializer=he_normal()),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model with Adam optimizer and categorical crossentropy loss
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model with early stopping and learning rate scheduler
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
# learning_rate_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.0001)

lr_scheduler_exp_decay = LearningRateScheduler(exponential_decay)
lr_scheduler_cyclic_lr = LearningRateScheduler(cyclic_lr)

# history = model.fit(alz_images_train, alz_labels_train_onehot, epochs=30, batch_size=32,
#                      validation_data=(alz_images_test, alz_labels_test_onehot), 
#                      callbacks=[early_stopping, learning_rate_scheduler])

history_exp_decay = model.fit(alz_images_train, alz_labels_train_onehot, epochs=30, batch_size=32,
                              validation_data=(alz_images_test, alz_labels_test_onehot), 
                              callbacks=[lr_scheduler_exp_decay, early_stopping])

history_cyclic_lr = model.fit(alz_images_train, alz_labels_train_onehot, epochs=30, batch_size=32,
                              validation_data=(alz_images_test, alz_labels_test_onehot), 
                              callbacks=[lr_scheduler_cyclic_lr, early_stopping])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30


<h1> DNN </h1>

In [26]:
# Reshape the input data to have rank 4
alz_images_train = alz_images_train.reshape(-1, 224, 224, 1)
alz_images_test = alz_images_test.reshape(-1, 224, 224, 1)

# Verify the shapes
print("Training data shape:", alz_images_train.shape)
print("Testing data shape:", alz_images_test.shape)


Training data shape: (5121, 224, 224, 1)
Testing data shape: (1279, 224, 224, 1)


In [30]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator


# Define model architecture with batch normalization
model = Sequential([
    Flatten(input_shape=(img_height, img_width, num_channels)),

    Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    BatchNormalization(),

    Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    BatchNormalization(),
    
    Dropout(0.2),

    Dense(num_classes, activation='softmax')
])

# Compile the model with Adam optimizer and categorical crossentropy loss
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Define callbacks for early stopping and learning rate scheduler
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

def exponential_decay(epoch, initial_lr=0.001, decay_rate=0.9):
    return initial_lr * np.power(decay_rate, epoch)

lr_scheduler_exp_decay = LearningRateScheduler(exponential_decay)


# Create an instance of ImageDataGenerator with desired augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,      # Randomly rotate images by up to 20 degrees
    width_shift_range=0.1,  # Randomly shift images horizontally by up to 10% of the width
    height_shift_range=0.1, # Randomly shift images vertically by up to 10% of the height
    shear_range=0.2,        # Randomly apply shear transformations
    zoom_range=0.2,         # Randomly zoom in by up to 20%
    horizontal_flip=True,   # Randomly flip images horizontally
    fill_mode='nearest'     # Fill in newly created pixels (due to augmentation) using the nearest existing pixel
)


# Define batch size and number of epochs
batch_size = 32
epochs = 30

# Create augmented training data generator
train_generator = datagen.flow(alz_images_train, alz_labels_train_onehot, batch_size=batch_size)

# Train the model using the augmented data generator
history = model.fit(train_generator,
                    steps_per_epoch=len(alz_images_train) // batch_size,
                    epochs=epochs,
                    validation_data=(alz_images_test, alz_labels_test_onehot),
                    callbacks=[early_stopping, lr_scheduler_exp_decay])



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30


<h1>FNN</h1>

In [7]:
from sklearn.decomposition import PCA

# Generator function to yield batches of preprocessed images
def image_generator(images, batch_size=32):
    num_images = len(images)
    num_batches = (num_images + batch_size - 1) // batch_size
    
    for i in range(num_batches):
        start_index = i * batch_size
        end_index = min((i + 1) * batch_size, num_images)
        
        # Load and preprocess images for the current batch
        batch_images = np.stack((images[start_index:end_index],) * 3, axis=-1)
        preprocessed_images = tf.keras.applications.mobilenet_v2.preprocess_input(batch_images)
        
        yield preprocessed_images

# Function to extract features from images using a pre-trained CNN and perform PCA
def extract_features(images, batch_size=32, n_components=64):
    feature_extractor = tf.keras.applications.MobileNetV2(include_top=False, weights='imagenet', input_shape=(img_height, img_width, 3))
    feature_extractor.trainable = False
    
    features = []
    for batch_images in image_generator(images, batch_size=batch_size):
        batch_features = feature_extractor.predict(batch_images)
        batch_features_flat = batch_features.reshape(batch_features.shape[0], -1)
        features.append(batch_features_flat)
    
    all_features = np.concatenate(features, axis=0)
    
    pca = PCA(n_components=n_components)
    reduced_features = pca.fit_transform(all_features)
    
    return reduced_features

# Extract features from training and test images
train_features = extract_features(alz_images_train)
test_features = extract_features(alz_images_test)



In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the GCN model architecture
def create_gcn_model(input_dim, output_dim):
    inputs = tf.keras.Input(shape=(input_dim,))
    x = layers.Dense(64, activation='relu')(inputs)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(output_dim, activation='softmax')(x)  # Adjust activation based on your task
    model = tf.keras.Model(inputs=inputs, outputs=x)
    return model

# Define model parameters
input_dim = train_features.shape[1]  # Input dimension is the number of features after PCA
output_dim = num_classes  # Output dimension is the number of classes
learning_rate = 0.001
batch_size = 32
epochs = 10

# Create and compile the model
model = create_gcn_model(input_dim, output_dim)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(train_features, alz_labels_train_onehot, 
                    batch_size=batch_size, 
                    epochs=epochs, 
                    validation_data=(test_features, alz_labels_test_onehot))

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_features, alz_labels_test_onehot)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 1.4620929956436157, Test Accuracy: 0.4362783432006836


<h1>CNN with Graph based features</h1>


In [10]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Compute adjacency matrix based on feature similarity
def compute_feature_similarity(features):
    num_images = features.shape[0]
    similarities = np.zeros((num_images, num_images))
    for i in range(num_images):
        for j in range(num_images):
            # Compute cosine similarity between feature vectors
            similarities[i, j] = cosine_similarity(features[i].reshape(1, -1), features[j].reshape(1, -1))[0, 0]
    return similarities

In [15]:
# Compute feature similarities for training and test images
train_feature_similarity = compute_feature_similarity(train_features)

In [11]:
test_feature_similarity = compute_feature_similarity(test_features)

In [12]:
similarity_threshold = 0.8

# Construct adjacency matrix based on feature similarity
def construct_adjacency_matrix(feature_similarity, threshold):
    num_images = feature_similarity.shape[0]
    adjacency_matrix = np.zeros((num_images, num_images))
    for i in range(num_images):
        for j in range(num_images):
            # Set adjacency matrix value based on whether feature similarity is above threshold
            if feature_similarity[i, j] >= threshold:
                adjacency_matrix[i, j] = 1
                adjacency_matrix[j, i] = 1  # Symmetric adjacency matrix
    return adjacency_matrix


In [None]:
# Compute adjacency matrix for training and test images
train_adj_matrix = construct_adjacency_matrix(train_feature_similarity, similarity_threshold)
print("Shape of adjacency matrix for training images:", train_adj_matrix.shape)

In [13]:
test_adj_matrix = construct_adjacency_matrix(test_feature_similarity, similarity_threshold)
print("Shape of adjacency matrix for test images:", test_adj_matrix.shape)

Shape of adjacency matrix for test images: (1279, 1279)


In [None]:
print("Shape of train_features:", train_features.shape)
print("Shape of test_features:", test_features.shape)

print("\nShape of train_features similarity:", train_feature_similarity.shape)
print("Shape of test_features similarity:", test_feature_similarity.shape)

print("\nShape of train_adj_matrix:", train_adj_matrix.shape)
print("Shape of test_adj_matrix:", test_adj_matrix.shape)


In [None]:
# Images with grayscale index
alz_images_train_with_grayscale_index = alz_images_train[..., np.newaxis]  # Add channel dimension for grayscale images
alz_images_test_with_grayscale_index = alz_images_test[..., np.newaxis]

# Integrate graph-based features
train_features_with_graph = np.concatenate([train_features, train_adj_matrix], axis=1)
test_features_with_graph = np.concatenate([test_features, test_adj_matrix], axis=1)

print("Shape of training images with grayscale index:", alz_images_train_with_grayscale_index.shape)
print("Shape of testing images with grayscale index:", alz_images_test_with_grayscale_index.shape)
print("Shape of training features with graph:", train_features_with_graph.shape)
print("Shape of testing features with graph:", test_features_with_graph.shape)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Concatenate, Flatten, Dense

# Define input layers for image data and graph data
image_input = Input(shape=(img_height, img_width, 1), name='image_input')
graph_input = Input(shape=(train_features.shape[1],), name='graph_input')
adj_input = Input(shape=(train_adj_matrix.shape[1],), name='adj_input')
print(adj_input)

# Flatten the image data
flatten_image = Flatten()(image_input)

# Concatenate flattened image data with graph data
concatenated_input = Concatenate()([flatten_image, graph_input, adj_input])

# Define the dense layers
x = Dense(128, activation='relu')(concatenated_input)
x = Dense(64, activation='relu')(x)

# Output layer
output = Dense(4, activation='softmax', name='output')(x)

# Create the model
model2 = Model(inputs=[image_input, graph_input, adj_input], outputs=output)

# Compile the model
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

In [None]:
# Define the number of epochs and batch size
epochs = 10
batch_size = 32

history = model.fit(
    {'image_input': alz_images_train_with_grayscale_index, 'graph_input': train_features, 'adj_input': train_adj_matrix},
    {'output': alz_labels_train_onehot},
    epochs=10,
    batch_size=32,
    validation_data=({'image_input': alz_images_test_with_grayscale_index, 'graph_input': test_features, 'adj_input': test_adj_matrix}, {'output': alz_labels_test_onehot})
)

<h1>Jay's Algorthm</h1>
SVM and KNN (K-Nearest Neighbors)

<h1>Geoffrey's Algorithm</h1>
Random Forest and RNN