<h1>Preprocessing </h1>

In [1]:
import tensorflow as tf
import cv2
import numpy as np
import os

In [2]:

# This function preprocesses the image by reading in the image apply grayscale make all the sizes the same and 
def preprocess_image(file_path, img_size):
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) # Grayscale will even the playing field if we start getting different types of images. If the images color is a factor we can take out grayscale
    img = cv2.resize(img, img_size)
    img = img.astype('float')/255.0 # Make the pixels become float and normalize to 0-1 for normalization
    return img

# This function will pull from the directory and all subdirectory for the image and give it a label to the directory it is in
def load_images_from_directory(directory, target_size =(224, 224)):
    images = []
    labels = []
    # Iterates through all subdirectories
    for subdir in os.listdir(directory):
        label = subdir #Make the subdirectory name be a label
        subdir_path = os.path.join(directory, subdir)

        # Checks if the object it is looking at is a directory and if it is go into the directory and get all the files and preprocess them
        if os.path.isdir(subdir_path):
            for image in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, image)

                image = preprocess_image(file_path, target_size)

                # Append to the arrays after preprocessing
                images.append(image)
                labels.append(label)

    return np.array(images), np.array(labels)




In [9]:
# import sys # for debugging 

# Define the directory paths for the training and test datasets
train_dir = "./Alzheimer_s Dataset/train"
test_dir = "./Alzheimer_s Dataset/test"

# Load images and labels from the training directory
alz_images_train, alz_labels_train = load_images_from_directory(train_dir)

# Load images and labels from the test directory
alz_images_test, alz_labels_test = load_images_from_directory(test_dir)

# Print information about the training dataset
print("Train")
print('Image shape:', alz_images_train.shape)
print('Labels shape:', alz_labels_train.shape)

# Print information about the test dataset
print("\nTest")
print('Image shape:', alz_images_test.shape)
print('Labels shape:', alz_labels_test.shape)


# np.set_printoptions(threshold=sys.maxsize) # for debugging

# print('Image train:', alz_images_train) # for debugging

# The output of the shape follows this
#  (X, X1, X2)
# X is the number of pictures in the array   
# X1 is the number of rows for a single picture (should be 224 since that is the scale)
# X2 is the number of columns in each picture  (should be 224 since that is the scale)
#  *Scale can be change to 207 since that is how the data is processed. 
# 
# When pull out the full array, you see alot of 0 at the start and end and that is because of the black around the brain
# 


Train
Image shape: (1, 224, 224)
Labels shape: (1,)

Test
Image shape: (1279, 224, 224)
Labels shape: (1279,)
Image train: [[[0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.  

<h1> Aaron's Algorithm </h1>
CNN GCNN or similar neural networks that can be adjusted in between each other

<h1>Jay's Algorthm</h1>
SVM and KNN (K-Nearest Neighbors)

In [None]:
import tensorflow as tf
import cv2
import numpy as np
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

def flatten_images(images):
    return images.reshape(images.shape[0], -1)

# Load and preprocess the images
train_dir = "./Alzheimer_s Dataset/train"
test_dir = "./Alzheimer_s Dataset/test"

alz_images_train, alz_labels_train = load_images_from_directory(train_dir)
alz_images_test, alz_labels_test = load_images_from_directory(test_dir)

# Flatten image data for KNN compatability
alz_images_train_flat = flatten_images(alz_images_train)
alz_images_test_flat = flatten_images(alz_images_test)

# Initialize KNN model
knn = KNeighborsClassifier(n_neighbors=3)  # You can tune the number of neighbors here

# Fit KNN model on the training data
knn.fit(alz_images_train_flat, alz_labels_train)

# KNN model on test data
alz_labels_pred = knn.predict(alz_images_test_flat)

# accuracy score
accuracy = accuracy_score(alz_labels_test, alz_labels_pred)
print(f"Test Accuracy: {accuracy:.2f}")

# Classification report
from sklearn.metrics import classification_report
print(classification_report(alz_labels_test, alz_labels_pred))


# Initialize lists to store accuracies and k values
k_values = list(range(1, 15))  # Evaluating k from 1 to 14
accuracies = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(alz_images_train_flat, alz_labels_train)
    alz_labels_pred = knn.predict(alz_images_test_flat)
    accuracy = accuracy_score(alz_labels_test, alz_labels_pred)
    accuracies.append(accuracy)
    print(f"Accuracy for k={k}: {accuracy:.2f}")

# Plot results
plt.figure(figsize=(10, 6))
plt.plot(k_values, accuracies, marker='o')
plt.title('KNN Accuracy vs Number of Neighbors')
plt.xlabel('Number of Neighbors (k)')
plt.ylabel('Accuracy')
plt.grid(True)
plt.show()

<h1>Geoffrey's Algorithm</h1>
Random Forest and RNN