In [2]:
import tensorflow as tf
import cv2
import numpy as np
import os
import sys # for debugging 

In [3]:

# # This function preprocesses the image by reading in the image apply grayscale make all the sizes the same and 
# def preprocess_image(file_path, img_size):
#     img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) # Grayscale will even the playing field if we start getting different types of images. If the images color is a factor we can take out grayscale
#     img = cv2.resize(img, img_size)
#     img = img.astype('float')/255.0 # Make the pixels become float and normalize to 0-1 for normalization
#     return img







# This function preprocesses the image by reading in the image apply grayscale make all the sizes the same and 
def preprocess_image(file_path, img_size):
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) # Grayscale will even the playing field if we start getting different types of images. If the images color is a factor we can take out grayscale
    
    # Thresholding to remove black background
    _, binary_image = cv2.threshold(img, 10, 255, cv2.THRESH_BINARY)
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=8)
    largest_component_label = np.argmax(stats[1:, cv2.CC_STAT_AREA]) + 1
    brain_mask = (labels == largest_component_label).astype(np.uint8) * 255
    x, y, w, h = cv2.boundingRect(brain_mask)
    img = img[y:y+h, x:x+w]
    
    img = cv2.resize(img, img_size)
    img = img.astype('float')/255.0 # Make the pixels become float and normalize to 0-1 for normalization
    return img


target_size =(224, 224)

# This function will pull from the directory and all subdirectory for the image and give it a label to the directory it is in
def load_images_from_directory(directory):
    images = []
    labels = []
    # Iterates through all subdirectories
    for subdir in os.listdir(directory):
        label = subdir #Make the subdirectory name be a label
        subdir_path = os.path.join(directory, subdir)

        # Checks if the object it is looking at is a directory and if it is go into the directory and get all the files and preprocess them
        if os.path.isdir(subdir_path):
            for image in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, image)

                image = preprocess_image(file_path, target_size)

                # Append to the arrays after preprocessing
                images.append(image)
                labels.append(label)

    return np.array(images), np.array(labels)

In [4]:


# Define the directory paths for the training and test datasets
train_dir = "./Alzheimer_s Dataset/train"
test_dir = "./Alzheimer_s Dataset/test"
# single_test_dir = "./Alzheimer_s Dataset/single_test"

# Load images and labels from the training directory
alz_images_train, alz_labels_train = load_images_from_directory(train_dir)

# Load images and labels from the test directory
alz_images_test, alz_labels_test = load_images_from_directory(test_dir)

# alz_single_images_test, alz_single_labels_test = load_images_from_directory(single_test_dir)

# Print information about the training dataset
print("Train")
print('Image shape:', alz_images_train.shape)
print('Labels shape:', alz_labels_train.shape)

# Print information about the test dataset
print("\nTest")
print('Image shape:', alz_images_test.shape)
print('Labels shape:', alz_labels_test.shape)


# print("\nSingle Test")
# print('Image shape:', alz_single_images_test.shape)
# print('Labels shape:', alz_single_labels_test.shape)


# np.set_printoptions(threshold=sys.maxsize) # for debugging

# print('Image train:', alz_single_images_test) # for debugging

# The output of the shape follows this
#  (X, X1, X2)
# X is the number of pictures in the array   
# X1 is the number of rows for a single picture (should be 224 since that is the scale)
# X2 is the number of columns in each picture  (should be 224 since that is the scale)
#  *Scale can be change to 207 since that is how the data is processed. 
# 
# When pull out the full array, you see alot of 0 at the start and end and that is because of the black around the brain

Train
Image shape: (5121, 224, 224)
Labels shape: (5121,)

Test
Image shape: (1279, 224, 224)
Labels shape: (1279,)


In [5]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

alz_labels_train_encoded = label_encoder.fit_transform(alz_labels_train)
alz_labels_test_encoded = label_encoder.fit_transform(alz_labels_test)

num_classes = len(label_encoder.classes_)

alz_labels_train_onehot = tf.keras.utils.to_categorical(alz_labels_train_encoded, num_classes)
alz_labels_test_onehot = tf.keras.utils.to_categorical(alz_labels_test_encoded, num_classes)

#np.set_printoptions(threshold=sys.maxsize) # for debugging
#print(alz_labels_train_onehot)

print("Training labels shape (one-hot encoded):", alz_labels_train_onehot.shape)
print("Testing labels shape (one-hot encoded):", alz_labels_test_onehot.shape)

# print('Image train:', alz_images_train) # for debugging


# 0 = MildDemented
# 1 = ModerateDemented
# 2 = NonDemented
# 3 = VeryMildDemented

Training labels shape (one-hot encoded): (5121, 4)
Testing labels shape (one-hot encoded): (1279, 4)


In [6]:
img_height = target_size[1]
img_width = target_size[0]
num_channels = 1

<h1>GCN</h1>

In [14]:
from sklearn.decomposition import IncrementalPCA

# Function to extract features from images using a pre-trained CNN
def extract_features(images, batch_size=32):
    num_images = len(images)
    num_batches = (num_images + batch_size - 1) // batch_size
    features = []
    
    # Replace this with your preferred pre-trained CNN model
    pretrained_model = tf.keras.applications.MobileNetV2(include_top=False, weights='imagenet', input_shape=(img_height, img_width, 3))
    pretrained_model.trainable = False
    feature_extractor = tf.keras.Model(inputs=pretrained_model.input, outputs=pretrained_model.layers[-1].output)
    
    for i in range(num_batches):
        start_index = i * batch_size
        end_index = min((i + 1) * batch_size, num_images)
        
        # Convert grayscale images to RGB by stacking them along the third axis
        batch_images = np.stack((images[start_index:end_index],) * 3, axis=-1)
        
        # Preprocess images and extract features
        preprocessed_images = tf.keras.applications.mobilenet_v2.preprocess_input(batch_images)
        batch_features = feature_extractor(preprocessed_images)
        features.append(batch_features)
    
    features = np.concatenate(features, axis=0)
    
    # Reshape features to 2D array
    num_samples = features.shape[0]
    features_flat = features.reshape(num_samples, -1)
    
    # Perform incremental PCA to reduce memory usage
    ipca = IncrementalPCA(n_components=64, batch_size=batch_size)
    reduced_features = ipca.fit_transform(features_flat)
    return reduced_features

# Extract features from training and test images
train_features = extract_features(alz_images_train)
test_features = extract_features(alz_images_test)



MemoryError: Unable to allocate 36.8 MiB for an array with shape (32, 224, 224, 3) and data type float64

In [8]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Function to compute image similarity based on cosine similarity
def compute_image_similarity(images):
    num_images = len(images)
    similarities = np.zeros((num_images, num_images))
    for i in range(num_images):
        for j in range(num_images):
            # Compute cosine similarity between image vectors
            similarities[i, j] = cosine_similarity(images[i].reshape(1, -1), images[j].reshape(1, -1))[0, 0]
    return similarities

# Compute image similarities for training and test images
train_feature_similarity = compute_feature_similarity(train_features)
test_feature_similarity = compute_feature_similarity(test_features)

# Threshold to determine if two images are similar enough to be connected
similarity_threshold = 0.8

# Create graph for training set based on image similarity
train_G = nx.Graph()
num_train_images = len(alz_images_train)
for i in range(num_train_images):
    train_G.add_node(i)
    for j in range(i + 1, num_train_images):
        if train_image_similarity[i, j] >= similarity_threshold:
            train_G.add_edge(i, j)

# Create graph for test set based on image similarity
test_G = nx.Graph()
num_test_images = len(alz_images_test)
for i in range(num_test_images):
    test_G.add_node(i)
    for j in range(i + 1, num_test_images):
        if test_image_similarity[i, j] >= similarity_threshold:
            test_G.add_edge(i, j)


KeyboardInterrupt: 

In [None]:
import numpy as np

# Define a function to compute similarity between images
def compute_similarity(image1, image2):
    # Example: compute similarity based on pixel-wise comparison
    return np.sum(np.abs(image1 - image2))

# Compute adjacency matrix based on image similarities
def compute_adjacency_matrix(images):
    num_images = len(images)
    adjacency_matrix = np.zeros((num_images, num_images))
    for i in range(num_images):
        for j in range(num_images):
            similarity = compute_similarity(images[i], images[j])
            adjacency_matrix[i, j] = similarity
    return adjacency_matrix






In [None]:
# Compute adjacency matrix for training images and test images
train_adj_matrix = compute_adjacency_matrix(alz_images_train)
print("Shape of adjacency matrix:", train_adj_matrix.shape)

test_adj_matrix = compute_adjacency_matrix(alz_images_test)
print("Shape of adjacency matrix:", test_adj_matrix.shape)