# Colab Code

from google.colab import drive
drive.mount('/content/drive')

import shutil
import os
 
FILE_NAME = "Data.zip"
 
def copy_zip_file(src_path, dest_dir):
 
    zip_filename = os.path.basename(src_path)
    dest_path = os.path.join(dest_dir, zip_filename)
 
    if not os.path.exists(src_path):
        print(f"Error: The file '{src_path}' does not exist.")
        return
 
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
 
    shutil.copy2(src_path, dest_path)
    print(f"'{zip_filename}' has been copied to '{dest_dir}'.")
 
source_path = "/content/drive/MyDrive/" + FILE_NAME
destination_directory = "/content"
 
copy_zip_file(source_path, destination_directory)

!unzip Data.zip

tf.config.list_physical_devices('GPU')

if tf.test.gpu_device_name(): 

    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))

else:

   print("Please install GPU version of TF")

print(f"-> {tf.config.list_physical_devices('GPU')}")

In [1]:
import tensorflow as tf

print(tf.__version__)

2.11.0


In [3]:
# imports 
import os
import glob
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from PIL import Image
import numpy as np
import tensorflow as tf
from collections import Counter
import random
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from imblearn.under_sampling import RandomUnderSampler
import shutil

# Read data

In [4]:
# Define Paths
dataset_dir = "Data"  # Root folder containing class folders
# Adjust class names to match actual folder names
classes = ["Mild Dementia", "Moderate Dementia", "Non Demented", "Very mild Dementia"]

# Load Data with Correct Folder Names
image_paths, labels = [], []
for class_label, class_name in enumerate(classes):
    class_dir = os.path.join(dataset_dir, class_name)
    if not os.path.exists(class_dir):
        print(f"Error: Folder {class_dir} does not exist.")
        continue
    files = glob.glob(f"{class_dir}/*.jpg")  # Adjust extension if needed
    print(f"Class: {class_name}, Files Found: {len(files)}")  # Debug: Count files
    for file_path in files:
        image_paths.append(file_path)
        labels.append(class_label)

# Proceed with the pipeline if files are found
if len(image_paths) == 0:
    raise ValueError("No images found. Check dataset folder names or file paths.")

Class: Mild Dementia, Files Found: 5002
Class: Moderate Dementia, Files Found: 488
Class: Non Demented, Files Found: 67222
Class: Very mild Dementia, Files Found: 13725


In [5]:
# Split data into training, testing and validation
train_paths, test_paths, train_labels, test_labels = train_test_split(image_paths, labels, test_size=0.3, random_state=42,stratify=labels) # suffle by default and straity labels 
test_paths, val_paths, test_labels, val_labels = train_test_split(test_paths, test_labels, test_size=0.5, random_state=42,stratify=test_labels) # suffle by default and straity labels
# to keep the same class distribution

In [6]:
# Count classes in each split
train_class_counts = Counter(train_labels)
test_class_counts = Counter(test_labels)
val_class_counts = Counter(val_labels)

print("Training set class distribution:", train_class_counts)
print("Testing set class distribution:", test_class_counts)
print("Validation set class distribution:", val_class_counts)

Training set class distribution: Counter({2: 47055, 3: 9607, 0: 3501, 1: 342})
Testing set class distribution: Counter({2: 10084, 3: 2059, 0: 750, 1: 73})
Validation set class distribution: Counter({2: 10083, 3: 2059, 0: 751, 1: 73})


In [5]:
# Balance data using tomeklinks undersampling
import cv2
def load_image(path, image_size=(224, 224)):
    img = cv2.imread(path)  # Read image
    img_resized = cv2.resize(img, image_size)  # Resize to a fixed size
    img_flattened = img_resized.flatten()  # Flatten image to 1D array
    return img_flattened

def load_images(image_paths, image_size=(224, 224)):
    images = list(map(lambda path: load_image(path, image_size), image_paths))
    return np.array(images)

X = load_images(train_paths)
# Initialize TomekLinks
RandomUnderSampler = RandomUnderSampler()

# Apply TomekLinks to the dataset
train_paths_under, train_labels_under = RandomUnderSampler.fit_resample(X, train_labels)

In [6]:
# After undersampling, get the indices of the selected paths
selected_indices = np.where(np.isin(train_paths_under, train_labels_under))[0]

# Retrieve the corresponding paths
train_paths_under = [train_paths[i] for i in selected_indices]

In [7]:
X_val = load_images(val_paths)
val_paths_under, val_labels_under = RandomUnderSampler.fit_resample(X_val, val_labels)

In [8]:
# After undersampling, get the indices of the selected paths
selected_indices = np.where(np.isin(val_paths_under, val_labels_under))[0]

# Retrieve the corresponding paths
val_paths_under = [val_paths[i] for i in selected_indices]

In [9]:
train_class_counts = Counter(train_labels_under)
print("Training set class distribution:", train_class_counts)

Training set class distribution: Counter({0: 342, 1: 342, 2: 342, 3: 342})


In [10]:
val_class_counts = Counter(val_labels_under)
print("Training set class distribution:", val_class_counts)

Training set class distribution: Counter({0: 73, 1: 73, 2: 73, 3: 73})


In [11]:
# # Mover as imagens para as pastas correspondentes
# for image_path, label in zip(train_paths, train_labels):
#     label_dir = os.path.join('train', str(label))
#     os.makedirs(label_dir, exist_ok=True)
#     shutil.copy(image_path, os.path.join(label_dir, os.path.basename(image_path)))

# for image_path, label in zip(val_paths, val_labels):
#     label_dir = os.path.join('val', str(label))
#     os.makedirs(label_dir, exist_ok=True)
#     shutil.copy(image_path, os.path.join(label_dir, os.path.basename(image_path)))

# for image_path, label in zip(test_paths, test_labels):
#     label_dir = os.path.join('test', str(label))
#     os.makedirs(label_dir, exist_ok=True)
#     shutil.copy(image_path, os.path.join(label_dir, os.path.basename(image_path)))

In [12]:
# path_treino = "train"
# path_validacao = "val"
# path_teste = "test"
# #Rescale data and create data generator instances
# train_datagenerator = ImageDataGenerator(rescale=1/255.)
# val_datagenerator = ImageDataGenerator(rescale=1/255.)
# test_datagenerator = ImageDataGenerator(rescale=1/255.)
# datagenerator_augmentation = ImageDataGenerator(rescale = 1/255.,
#                                                       rotation_range=20, #rotate the image
#                                                       zoom_range = 0.2,#zoom the image
#                                                       width_shift_range=0.2, #shift the image horizontally
#                                                       height_shift_range=0.2, #shift the image vertically
#                                                       horizontal_flip=True, #flip the image on horizontal axis
#                                                       vertical_flip=True, #flip the image on vertical axis
#                                                       shear_range = 0.2) #Shear the image


In [13]:
# #Load data in from images and turn into batches
# train_data = train_datagenerator.flow_from_directory(path_treino,
#                                                      target_size=(224,224),
#                                                      batch_size=32,
#                                                      class_mode='categorical'
#                                                     )
# val_data = val_datagenerator.flow_from_directory(path_validacao,
#                                                      target_size=(224,224),
#                                                      batch_size=32,
#                                                      class_mode='categorical'
#                                                     )
# test_data = test_datagenerator.flow_from_directory(path_teste,
#                                                      target_size=(224,224),
#                                                      batch_size=32,
#                                                      class_mode='categorical'
#                                                     )
# train_data_augmented = datagenerator_augmentation.flow_from_directory(path_treino,
#                                                                             target_size=(224,224),
#                                                                             batch_size=32,
#                                                                             class_mode='categorical',
#                                                                             shuffle=True)

In [14]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
import numpy as np
from sklearn.preprocessing import LabelBinarizer


train_paths_under = np.array(train_paths_under)
test_paths = np.array(test_paths)
val_paths_under = np.array(val_paths_under)

# One-hot encode the labels
label_binarizer = LabelBinarizer()
train_labels_under = label_binarizer.fit_transform(train_labels_under)
test_labels = label_binarizer.transform(test_labels)
val_labels_under = label_binarizer.transform(val_labels_under)


# Define the datagenerator
datagenerator_augmentation = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    shear_range=0.2
)

# Preprocess and load an image from path
def preprocess_image_with_array(image_path):
    img = load_img(image_path, target_size=(224, 224))  # Load image and resize
    img_array = img_to_array(img)
    return img_array

# Apply augmentation multiple times using ImageDataGenerator
def augment_multiple_times_keras(image_path, label, num_times):
    image = preprocess_image_with_array(image_path)
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    augmented_images = []

    # Generate num_times augmentations
    for _ in range(num_times):
        augmented_img = next(datagenerator_augmentation.flow(image, batch_size=1))[0]
        augmented_images.append((augmented_img, label))

    return augmented_images

# Load dataset and preprocess using ImageDataGenerator-compatible approach
def load_dataset_keras(image_paths, labels):
    dataset = []
    for path, label in zip(image_paths, labels):
        image = preprocess_image_with_array(path)
        dataset.append((image, label, path))  # Include path here
    return dataset

# Augment dataset by class
def filter_by_class(dataset, class_index):
    return [(img, lbl, path) for img, lbl, path in dataset if np.argmax(lbl) == class_index]

def augment_class(dataset, class_index, num_augments):
    class_subset = filter_by_class(dataset, class_index)
    augmented = []
    for img, lbl, path in class_subset:  # Unpack the path here as well
        augmented += augment_multiple_times_keras(path, lbl, num_augments)
    return augmented

# Preprocess entire dataset
train_dataset_raw = load_dataset_keras(train_paths_under, train_labels_under)
val_dataset_raw = load_dataset_keras(val_paths_under, val_labels_under)
test_dataset_raw = load_dataset_keras(test_paths, test_labels)

# Organize data
class_2_train_augmented = augment_class(train_dataset_raw, 2, 2)
class_3_train_augmented = augment_class(train_dataset_raw, 3, 2)
class_0_train_augmented = augment_class(train_dataset_raw, 0, 2)
class_1_train_augmented = augment_class(train_dataset_raw, 1, 2)

final_train_dataset = (
    class_2_train_augmented +
    class_3_train_augmented +
    class_0_train_augmented +
    class_1_train_augmented
)

class_2_val_augmented = augment_class(val_dataset_raw, 2, 2)
class_3_val_augmented = augment_class(val_dataset_raw, 3, 2)
class_0_val_augmented = augment_class(val_dataset_raw, 0, 2)
class_1_val_augmented = augment_class(val_dataset_raw, 1, 2)

final_val_dataset = (
    class_2_val_augmented +
    class_3_val_augmented +
    class_0_val_augmented +
    class_1_val_augmented
)

final_test_dataset = test_dataset_raw

In [15]:
from collections import Counter
import numpy as np

label_counts = Counter()

# Loop through dataset
for _, lbl in final_train_dataset:
    class_index = np.argmax(lbl)  # Directly apply np.argmax to the NumPy array
    label_counts[class_index] += 1

# Print class distribution
print("Final dataset class distribution:")
for label, count in sorted(label_counts.items()):
    print(f"Class {label}: {count} images")


Final dataset class distribution:
Class 0: 684 images
Class 1: 684 images
Class 2: 684 images
Class 3: 684 images


In [16]:
label_val_counts = Counter()

# Loop through dataset
for _, lbl in final_val_dataset:
    class_index = np.argmax(lbl.numpy())  # Convert one-hot to class index
    label_val_counts[class_index] += 1

# Print class distribution
print("Final val dataset class distribution:")
for label, count in sorted(label_val_counts.items()):
    print(f"Class {label}: {count} images")


AttributeError: 'numpy.ndarray' object has no attribute 'numpy'

In [17]:
# Define the CNN model
def create_cnn(num_classes=4):
    model = keras.Sequential([
        # Convolutional Block 1
        layers.Conv2D(32, (3,3), activation='relu', input_shape=(224, 224, 3)),
        layers.MaxPooling2D((2,2)),
        layers.BatchNormalization(),
        
        # Convolutional Block 2
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.BatchNormalization(),
        
        # Convolutional Block 3
        layers.Conv2D(128, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.BatchNormalization(),
        
        # Flatten & Dense Layers
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),  # Reduce overfitting
        layers.Dense(num_classes, activation='softmax')  # Output layer
    ])

    # Compile the model
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model



In [18]:
# Create the model
cnn_model = create_cnn()

# Print model summary
cnn_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 111, 111, 32)     0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 111, 111, 32)     128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 54, 54, 64)       0         
 2D)                                                             
                                                        

In [None]:
print("Test dataset shape:", final_test_dataset)
print("Final dataset shape:", final_train_dataset)
print("Test dataset shape:", final_val_dataset)

In [19]:
train_images, train_labels = zip(*final_train_dataset)
val_images, val_labels = zip(*final_val_dataset)

train_images = tf.convert_to_tensor(np.array(train_images))
train_labels = tf.convert_to_tensor(np.array(train_labels))

val_images = tf.convert_to_tensor(np.array(val_images))
val_labels = tf.convert_to_tensor(np.array(val_labels))

final_train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
final_val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))


BATCH_SIZE = 32

# Batch the dataset
train_dataset = final_train_dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = final_val_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

cnn_model.fit(train_dataset, validation_data=val_dataset, epochs=10)
#cnn_model.fit(final_dataset, epochs=10, steps_per_epoch=len(final_dataset))

: 

In [None]:
for img, label in final_dataset.take(1):
    print(img.shape, label.shape)


In [None]:
for img, label in preprocessed_dataset_val.take(1):
    print(img.shape, label.shape)


In [None]:
print(np.array(final_dataset))  # Shape of images
print(np.array(preprocessed_dataset_val))  # Shape of labels

In [None]:

print(np.array(final_dataset))  # Shape of images