# Colab Code

from google.colab import drive
drive.mount('/content/drive')

import shutil
import os
 
FILE_NAME = "Data.zip"
 
def copy_zip_file(src_path, dest_dir):
 
    zip_filename = os.path.basename(src_path)
    dest_path = os.path.join(dest_dir, zip_filename)
 
    if not os.path.exists(src_path):
        print(f"Error: The file '{src_path}' does not exist.")
        return
 
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
 
    shutil.copy2(src_path, dest_path)
    print(f"'{zip_filename}' has been copied to '{dest_dir}'.")
 
source_path = "/content/drive/MyDrive/" + FILE_NAME
destination_directory = "/content"
 
copy_zip_file(source_path, destination_directory)

!unzip Data.zip

tf.config.list_physical_devices('GPU')

if tf.test.gpu_device_name(): 

    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))

else:

   print("Please install GPU version of TF")

print(f"-> {tf.config.list_physical_devices('GPU')}")

In [None]:
import tensorflow as tf

print(tf.__version__)

In [None]:
# imports 
import os
import glob
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from PIL import Image
import numpy as np
import tensorflow as tf
from collections import Counter
import random
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from imblearn.under_sampling import RandomUnderSampler
import shutil

# Read data

In [None]:
# Define Paths
dataset_dir = "Data"  # Root folder containing class folders
# Adjust class names to match actual folder names
classes = ["Mild Dementia", "Moderate Dementia", "Non Demented", "Very mild Dementia"]

# Load Data with Correct Folder Names
image_paths, labels = [], []
for class_label, class_name in enumerate(classes):
    class_dir = os.path.join(dataset_dir, class_name)
    if not os.path.exists(class_dir):
        print(f"Error: Folder {class_dir} does not exist.")
        continue
    files = glob.glob(f"{class_dir}/*.jpg")  # Adjust extension if needed
    print(f"Class: {class_name}, Files Found: {len(files)}")  # Debug: Count files
    for file_path in files:
        image_paths.append(file_path)
        labels.append(class_label)

# Proceed with the pipeline if files are found
if len(image_paths) == 0:
    raise ValueError("No images found. Check dataset folder names or file paths.")

In [None]:

def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [128, 128])
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    return image

# Load Dataset
def load_dataset(image_paths, labels):
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(lambda x, y: (preprocess_image(x), y))
    return dataset



In [None]:
# Apply preprocessing so all datasets contain image tensors (not paths)
preprocessed_dataset = load_dataset(image_paths, labels)

# Split by class
class_2_dataset = preprocessed_dataset.filter(lambda img, label: tf.equal(label, 2)).shuffle(buffer_size=1000).take(15_000)
class_3_dataset = preprocessed_dataset.filter(lambda img, label: tf.equal(label, 3))
class_0_dataset = preprocessed_dataset.filter(lambda img, label: tf.equal(label, 0))
class_1_dataset = preprocessed_dataset.filter(lambda img, label: tf.equal(label, 1))

# Combine the datasets
final_dataset = (class_2_dataset
                 .concatenate(class_3_dataset)
                 .concatenate(class_0_dataset)
                 .concatenate(class_1_dataset))

In [None]:

label_counts = Counter()

# Loop through dataset
for _, lbl in final_dataset:
    class_index = int(lbl.numpy())  # Convert one-hot to class index
    label_counts[class_index] += 1

# Print class distribution
print("Final dataset class distribution:")
for label, count in sorted(label_counts.items()):
    print(f"Class {label}: {count} images")


In [None]:
import tensorflow as tf
import os
from PIL import Image
import numpy as np

def save_dataset_to_folder(dataset, folder_path, class_names):
    os.makedirs(folder_path, exist_ok=True)

    for class_name in class_names:
        class_dir = os.path.join(folder_path, class_name)
        os.makedirs(class_dir, exist_ok=True)

    counter = [0] * len(class_names)  # To count how many images per class

    for image, label in dataset:
        # If label is one-hot encoded, convert to integer
        if tf.rank(label) > 0:
            label = tf.argmax(label, axis=-1)

        print(label)
        label = int(label.numpy())
        class_name = class_names[label]

        # Convert tensor to numpy and save as PNG
        image_np = image.numpy()
        if image_np.dtype != 'uint8':
            image_np = (image_np * 255).astype('uint8')  # normalize if needed

        img = Image.fromarray(image_np)
        img_path = os.path.join(folder_path, class_name, f'{counter[label]}.jpg')
        img.save(img_path)

        counter[label] += 1



In [None]:
save_dataset_to_folder(final_dataset, "final_dataset", classes)


In [None]:
# Define Paths
dataset_dir = "final_dataset"  # Root folder containing class folders
# Adjust class names to match actual folder names
classes = ["Mild Dementia", "Moderate Dementia", "Non Demented", "Very mild Dementia"]

# Load Data with Correct Folder Names
image_paths, labels = [], []
for class_label, class_name in enumerate(classes):
    class_dir = os.path.join(dataset_dir, class_name)
    if not os.path.exists(class_dir):
        print(f"Error: Folder {class_dir} does not exist.")
        continue
    files = glob.glob(f"{class_dir}/*.jpg")  # Adjust extension if needed
    print(f"Class: {class_name}, Files Found: {len(files)}")  # Debug: Count files
    for file_path in files:
        image_paths.append(file_path)
        labels.append(class_label)

# Proceed with the pipeline if files are found
if len(image_paths) == 0:
    raise ValueError("No images found. Check dataset folder names or file paths.")

In [None]:
# Split data into training, testing and validation
train_paths, test_paths, train_labels, test_labels = train_test_split(image_paths, labels, test_size=0.3, random_state=42,stratify=labels) # suffle by default and straity labels 
test_paths, val_paths, test_labels, val_labels = train_test_split(test_paths, test_labels, test_size=0.5, random_state=42,stratify=test_labels) # suffle by default and straity labels
# to keep the same class distribution

In [None]:
train_dataset = load_dataset(train_paths, train_labels)
val_dataset = load_dataset(val_paths, val_labels)
test_dataset = load_dataset(test_paths, test_labels)

In [None]:
save_dataset_to_folder(train_dataset, "train_dataset", classes)
save_dataset_to_folder(val_dataset, "val_dataset", classes)
save_dataset_to_folder(test_dataset, "test_dataset", classes)

# Data Agumentation

In [None]:
# Define Paths
dataset_dir = "train_dataset"  # Root folder containing class folders
# Adjust class names to match actual folder names
classes = ["Mild Dementia", "Moderate Dementia", "Non Demented", "Very mild Dementia"]

# Load Data with Correct Folder Names
image_paths, labels = [], []
for class_label, class_name in enumerate(classes):
    class_dir = os.path.join(dataset_dir, class_name)
    if not os.path.exists(class_dir):
        print(f"Error: Folder {class_dir} does not exist.")
        continue
    files = glob.glob(f"{class_dir}/*.jpg")  # Adjust extension if needed
    print(f"Class: {class_name}, Files Found: {len(files)}")  # Debug: Count files
    for file_path in files:
        image_paths.append(file_path)
        labels.append(class_label)

# Proceed with the pipeline if files are found
if len(image_paths) == 0:
    raise ValueError("No images found. Check dataset folder names or file paths.")

In [None]:
path_train = "train_dataset"
path_val = "val_dataset"
#Rescale data and create data generator instances
# train_datagenerator = ImageDataGenerator(rescale=1/255.)
val_datagenerator = ImageDataGenerator(rescale=1/255.)
train_datagenerator_augmentation = ImageDataGenerator(rescale = 1/255.,
                                                      rotation_range=20, #rotate the image
                                                      zoom_range = 0.2,#zoom the image
                                                      width_shift_range=0.2, #shift the image horizontally
                                                      height_shift_range=0.2, #shift the image vertically
                                                      horizontal_flip=True, #flip the image on horizontal axis
                                                      vertical_flip=True, #flip the image on vertical axis
                                                      shear_range = 0.2) #Shear the image



In [None]:
#Load data in from images and turn into batches
# train_data = train_datagenerator.flow_from_directory(path_treino,
#                                                      target_size=(128,128),
#                                                      batch_size=32,
#                                                      class_mode='categorical'
#                                                     )
val_data = val_datagenerator.flow_from_directory(path_val,
                                                     target_size=(128,128),
                                                     batch_size=32,
                                                     class_mode='categorical'
                                                    )
train_data_augmented = train_datagenerator_augmentation.flow_from_directory(path_train,
                                                                            target_size=(128,128),
                                                                            batch_size=32,
                                                                            class_mode='categorical',
                                                                            shuffle=True)

In [None]:
def save_dataset_to_folder_augmented(dataset, folder_path, class_names):
    os.makedirs(folder_path, exist_ok=True)

    for class_name in class_names:
        class_dir = os.path.join(folder_path, class_name)
        os.makedirs(class_dir, exist_ok=True)

    counter = [0] * len(class_names)  # To count how many images per class

    for image, label in dataset:
        # If label is one-hot encoded, convert to integer
        if tf.rank(label) > 0:
            label = tf.argmax(label, axis=-1)  # This will give the class index

        # Now loop over the batch of labels and process each image
        for i in range(len(label)):
            single_label = int(label[i].numpy())  # Convert individual label to int
            class_name = class_names[single_label]

            # Convert tensor to numpy and save as PNG
            image_np = image[i]  # Access the i-th image in the batch (no need for .numpy())
            if image_np.dtype != 'uint8':
                image_np = (image_np * 255).astype('uint8')  # normalize if needed

            img = Image.fromarray(image_np)
            img_path = os.path.join(folder_path, class_name, f'{counter[single_label]}.jpg')
            img.save(img_path)

            counter[single_label] += 1


In [None]:
# save_dataset_to_folder_augmented(train_data_augmented, "train_dataset_augmentation", classes)

In [None]:
# label_counts = Counter()

# # Loop through dataset (limited by steps per epoch unless infinite loop)
# for _, lbl in train_data_augmented:
#     class_indices = np.argmax(lbl, axis=1) 
#     label_counts.update(class_indices)

#     # Optional: break if you've gone through the entire dataset once
#     if train_data_augmented.batch_index == 0:
#         break

# # Print class distribution
# print("Final dataset class distribution:")
# for label, count in sorted(label_counts.items()):
#     print(f"Class {label}: {count} images")


In [None]:

# train_paths_under = np.array(train_paths)
# test_paths = np.array(test_paths)
# val_paths_under = np.array(val_paths)

# # One-hot encode the labels
# label_binarizer = LabelBinarizer()
# train_labels = label_binarizer.fit_transform(train_labels)
# test_labels = label_binarizer.transform(test_labels)
# val_labels = label_binarizer.transform(val_labels)




In [None]:
# Define the CNN model
def create_cnn(num_classes=4):
    model = keras.Sequential([
        # Convolutional Block 1
        layers.Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)),
        layers.MaxPooling2D((2,2)),
        layers.BatchNormalization(),
        
        # Convolutional Block 2
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.BatchNormalization(),
        
        # Convolutional Block 3
        layers.Conv2D(128, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.BatchNormalization(),
        
        # Flatten & Dense Layers
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),  # Reduce overfitting
        layers.Dense(num_classes, activation='softmax')  # Output layer
    ])

    # Compile the model
    model.compile(
        optimizer='adam',
        # loss='categorical_crossentropy',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model



In [None]:
# Create the model
cnn_model = create_cnn()

# Print model summary
cnn_model.summary()

In [None]:
# Train the model

cnn_model.fit(train_dataset, validation_data=val_dataset, epochs=10)

