# Imports

In [9]:
import os 
import pandas as pd
import numpy as np
from glob import glob
from tqdm.notebook import tqdm
import shutil

<br>

<br>

# Common Functions

In [2]:
def detect_image_extension(image_folder):
    IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.bmp')
    image_paths = os.listdir(image_folder)
    assert len(image_paths), 'The images folder is empty! Please correct the folder path.'
    extension = os.path.splitext(image_paths[0])[-1]
    assert extension in IMG_EXTENSIONS, 'The images folder must only contain images files with consistent format.'
    return extension

<br>

# Definitions

In [3]:
# Path to image folder
IMG_FOLDER = '../datasets/reduced_eyepacs_resized_cropped/'
# Path to the file containing the image labels
IMG_LABELS_FILE = '../datasets/reduced_eyepacs_resized_cropped/trainLabels_cropped.csv'
IMAGE_EXT = detect_image_extension(IMG_FOLDER)
SEED = 100

<br>

# Severity-level Folder Structure

In [11]:
# Reading the labels data
df_labels = pd.read_csv(IMG_LABELS_FILE)
df_labels = df_labels[['image', 'level']]

In [12]:
# The image info will gather the useful information for splitting
df_img_info = df_labels

In [13]:
df_img_info['image'] = df_img_info['image'].transform(
    lambda file_name: os.path.join(IMG_FOLDER, file_name + IMAGE_EXT))

In [20]:
for level, group in df_img_info.groupby('level'):
    severity_level_path = os.path.join(IMG_FOLDER, str(level))
    os.makedirs(severity_level_path, exist_ok=True)
    for src_img_path in tqdm(group['image'].values, desc=f'Processing Severity {level}'):
        shutil.move(src_img_path, severity_level_path)

Processing Severity 0:   0%|          | 0/25802 [00:00<?, ?it/s]

Processing Severity 1:   0%|          | 0/2438 [00:00<?, ?it/s]

Processing Severity 2:   0%|          | 0/5288 [00:00<?, ?it/s]

Processing Severity 3:   0%|          | 0/872 [00:00<?, ?it/s]

Processing Severity 4:   0%|          | 0/708 [00:00<?, ?it/s]

<br>