#### Count the amount of pictures in each category

In [1]:
import cv2
import numpy as np
import os
import random
import shutil

folder_path = os.getcwd()
print(folder_path)
subfolders = [f.path for f in os.scandir(folder_path) if f.is_dir()]
print(subfolders)

c:\Users\z7567\Documents\GitHub\DSAN_6600_FINAL\Data
['c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Mild_Dementia', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Mild_Dementia_Choice', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Mild_Dementia_Test', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Moderate_Dementia', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Moderate_Dementia_Choice', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Non_Dementia', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Non_Dementia_Choice', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Non_Dementia_Test', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Very_Mild_Dementia', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Very_Mild_Dementia_Choice', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Very_Mild_Dementia_Test']


In [2]:
for subfolder in subfolders:
    count = len([name for name in os.listdir(subfolder) if os.path.isfile(os.path.join(subfolder, name))])
    print( f'{os.path.basename(subfolder)}', count)

Mild_Dementia 4122
Mild_Dementia_Choice 880
Mild_Dementia_Test 0
Moderate_Dementia 48
Moderate_Dementia_Choice 880
Non_Dementia 66342
Non_Dementia_Choice 880
Non_Dementia_Test 0
Very_Mild_Dementia 12845
Very_Mild_Dementia_Choice 880
Very_Mild_Dementia_Test 0


Due to the sample size of Moderate Dementia and the performance of our GPU, Select 10%, 48 images for Moderate dementia. For other categories, the size of train set is limited at 440 * 2 = 880 to make balance with the size of training set of the Moderate. Size of test size can be choose very large. Select images for training and validation. And select another images for testing.

In [3]:
def choose_imgs(folder_path, output_path, num, remove=False):
    all_files = os.listdir(folder_path)
    files = random.sample(all_files, num)

    for file_name in files:
        source_file_path = os.path.join(folder_path, file_name)
        destination_file_path = os.path.join(output_path, file_name)
        shutil.copyfile(source_file_path, destination_file_path)
        # Remove the images that have been chosen as training data, then the rest will be used as testing data
        if remove:
            os.remove(source_file_path)

In [None]:
os.makedirs('Non_Dementia_Choice')
os.makedirs('Mild_Dementia_Choice')
os.makedirs('Moderate_Dementia_Choice')
os.makedirs('Very_Mild_Dementia_Choice')

In [6]:
choose_imgs('Non_Dementia', 'Non_Dementia_Choice', 880, remove=True)
choose_imgs('Mild_Dementia', 'Mild_Dementia_Choice', 880, remove=True)
choose_imgs('Very_Mild_Dementia','Very_Mild_Dementia_Choice', 880, remove=True)
choose_imgs('Moderate_Dementia','Moderate_Dementia_Choice', 440, remove=True)

In [None]:
os.makedirs('Non_Dementia_Test')
os.makedirs('Mild_Dementia_Test')
os.makedirs('Very_Mild_Dementia_Test')

In [4]:
choose_imgs('Non_Dementia', 'Non_Dementia_Test', 400)
choose_imgs('Mild_Dementia', 'Mild_Dementia_Test', 400)
choose_imgs('Very_Mild_Dementia','Very_Mild_Dementia_Test', 400)

We can see the amount of pictures in each category is not balanced.  We can solve this problem by using data augmentation.

#### Data Augmentation

In [7]:
from PIL import Image, ImageEnhance, ImageOps

In [8]:
def augment_image(folder_path, aug_path, aug_num): # Augment image

    # Randomly decide which augmentations to apply
    decide_list = [False, False, False, False, False]
    indices = random.sample(range(5), aug_num)
    for i in indices:
        decide_list[i] = True
    files = os.listdir(folder_path)
    for file_name in files:
        if file_name.endswith(('png', 'jpg', 'jpeg', 'gif')):
            file_path = os.path.join(folder_path, file_name)
            img = Image.open(file_path)
            original_file_name, ext = os.path.splitext(file_name)
            original_size = img.size
        if decide_list[0]:
            # Rotate
            angle = random.randint(-45, 45) 
            augmented_img = img.rotate(angle, expand=True)
            # Keep the original size
            augmented_img = augmented_img.resize(original_size)
            augmented_file_path = os.path.join(aug_path, f"{original_file_name}_rotate{ext}")
            augmented_img.save(augmented_file_path)
        if decide_list[1]:
            # Crop
            left = random.randint(0, original_size[0] // 2)
            top = random.randint(0, original_size[1] // 2)
            right = random.randint(original_size[0] // 2, original_size[0])
            bottom = random.randint(original_size[1] // 2, original_size[1])
            augmented_img = img.crop((left, top, right, bottom))
            # Keep the original size
            augmented_img = augmented_img.resize(original_size)
            augmented_file_path = os.path.join(aug_path, f"{original_file_name}_crop{ext}")
            augmented_img.save(augmented_file_path)
        if decide_list[2]: 
            # Brightness
            factor = random.uniform(0.5, 1.5)
            enhancer = ImageEnhance.Brightness(img)
            augmented_img = enhancer.enhance(factor)
            augmented_file_path = os.path.join(aug_path, f"{original_file_name}_bright{ext}")
            augmented_img.save(augmented_file_path)
        if decide_list[3]: 
            # Mirror
            augmented_img = ImageOps.mirror(img)
            augmented_file_path = os.path.join(aug_path, f"{original_file_name}_mirror{ext}")
            augmented_img.save(augmented_file_path)
        if decide_list[4]:
            # Color
            factor = random.uniform(0.5, 1.5)
            enhancer = ImageEnhance.Color(img)
            transformed_img = enhancer.enhance(factor)
            transformed_file_path = os.path.join(aug_path, f"{original_file_name}_color{ext}")
            transformed_img.save(transformed_file_path)


In [14]:
augment_image('Moderate_Dementia_Choice','Moderate_Dementia_Choice', 1)

Now we can see the amount of pictures in each category.

In [15]:
folder_path = os.getcwd()
subfolders = [f.path for f in os.scandir(folder_path) if f.is_dir()]
print(subfolders)
for subfolder in subfolders:
    count = len([name for name in os.listdir(subfolder) if os.path.isfile(os.path.join(subfolder, name))])
    print( f'{os.path.basename(subfolder)}', count)

['c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Mild_Dementia', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Mild_Dementia_Choice', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Moderate_Dementia', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Moderate_Dementia_Choice', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Non_Dementia', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Non_Dementia_Choice', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Very_Mild_Dementia', 'c:\\Users\\z7567\\Documents\\GitHub\\DSAN_6600_FINAL\\Data\\Very_Mild_Dementia_Choice']
Mild_Dementia 4122
Mild_Dementia_Choice 880
Moderate_Dementia 48
Moderate_Dementia_Choice 880
Non_Dementia 66342
Non_Dementia_Choice 880
Very_Mild_Dementia 12845
Very_Mild_Dementia_Choice 880
