In [17]:
import os
import shutil
from sklearn.model_selection import train_test_split

In [18]:
def get_files(path):
    dir_name  = os.path.join(os.getcwd(), path)
    files     = os.listdir(dir_name)
    paths    = []

    for file in files:
        if '.jpg' in file:
            paths.append(os.path.join(path,file))
    
    return paths

In [19]:
def print_path_info(paths):
    g_paths = [path for path in paths if '_g_' in path]
    non_g_paths = [path for path in paths if '_g_' not in path]

    print("Total ->", len(paths))
    print("Glaucoma ->", len(g_paths))
    print("No Glaucoma", len(non_g_paths)) 
    print("Glaucoma % ->", len(g_paths)/len(paths))
    print("No Glaucoma % ->", len(non_g_paths)/len(paths))
    print("")

In [20]:
DATASET_NAME = 'ACRIMA'
path = './datasets/' + DATASET_NAME
paths = get_files(path)
print("Imagenes ->", len(paths))

Imagenes -> 455


In [21]:
# First split: 90% for training, 10% for validation
train_paths, val_paths = train_test_split(paths, test_size=0.10, random_state=42, shuffle=True)

# Second split: 90% of the first training set for training, 10% for testing
train_paths, test_paths = train_test_split(train_paths, test_size=0.10, random_state=30, shuffle=True)

print_path_info(train_paths)
print("--------------------")
print_path_info(val_paths)
print("--------------------")
print_path_info(test_paths)

Total -> 368
Glaucoma -> 148
No Glaucoma 220
Glaucoma % -> 0.40217391304347827
No Glaucoma % -> 0.5978260869565217

--------------------
Total -> 46
Glaucoma -> 24
No Glaucoma 22
Glaucoma % -> 0.5217391304347826
No Glaucoma % -> 0.4782608695652174

--------------------
Total -> 41
Glaucoma -> 22
No Glaucoma 19
Glaucoma % -> 0.5365853658536586
No Glaucoma % -> 0.4634146341463415



In [22]:
def copy_images(paths, destination_folder):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    for path in paths:
        shutil.copy(path, destination_folder)
    
    print("Images copied to", destination_folder)

# Copy images
copy_images(train_paths, 'datasets_split/'+DATASET_NAME+'/train')
copy_images(val_paths, 'datasets_split/'+DATASET_NAME+'/val')
copy_images(test_paths, 'datasets_split/'+DATASET_NAME+'/test')

Images copied to datasets_split/RIM/train
Images copied to datasets_split/RIM/val
Images copied to datasets_split/RIM/test


In [23]:
def copy_images(paths, destination_folder):
    glaucoma_folder = os.path.join(destination_folder, 'glaucoma')
    normal_folder = os.path.join(destination_folder, 'normal')

    if not os.path.exists(glaucoma_folder):
        os.makedirs(glaucoma_folder)
    if not os.path.exists(normal_folder):
        os.makedirs(normal_folder)

    for path in paths:
        if '_g_' in path:
            shutil.copy(path, glaucoma_folder)
        else:
            shutil.copy(path, normal_folder)

    print("Images copied to", destination_folder)

# Copy images
copy_images(train_paths, 'datasets_split_folder/'+DATASET_NAME+'/Fold0/train')
copy_images(val_paths, 'datasets_split_folder/'+DATASET_NAME+'/Fold0/valid')
copy_images(test_paths, 'datasets_split_folder/'+DATASET_NAME+'/Fold0/test')

Images copied to datasets_split_folder/RIM/Fold0/train
Images copied to datasets_split_folder/RIM/Fold0/valid
Images copied to datasets_split_folder/RIM/Fold0/test
