### Import the needed packages

In [1]:
# packages for handling file
import os
import shutil
# packages for handling data
import pandas as pd

### Global variable

In [2]:
path_metadata = 'data/metadata.csv'
metadata = pd.read_csv(os.path.join('./../', path_metadata))

### Validate the id as a unique key

In [3]:
# check id uniqueness
print(f'name unique: {metadata["patient"].nunique()}',
        f'id unique: {metadata["id"].nunique()}',
        sep='\n')

name unique: 91
id unique: 91


### Split the fundus image and oct image using raw metadata

In [4]:
# devide the data into fundus and oct images
fundus_images = metadata.loc[metadata.img_type == 'fundus', ['id', 'label', 'eye_side', 'path']]
oct_images = metadata.loc[metadata.img_type == 'oct', ['id', 'label', 'eye_side', 'path']]
labels = list(metadata.label.unique())

In [5]:
# create directories for the restructured dataset
if not os.path.exists("./../dataset_processed/"):
    for label in labels:
        os.makedirs(f"./../dataset_processed/fundus_image/{label}")
        os.makedirs(f"./../dataset_processed/oct_image/{label}")

In [6]:
# temporary dataframe for storing the new file name rules
new_file_name = pd.DataFrame(columns=['id', 'file_name', 'new_path'])

In [7]:
# restructuring the dataset
for label in labels:
    # get the data for each label
    fundus_images_label = fundus_images.loc[fundus_images.label == label]
    oct_images_label = oct_images.loc[oct_images.label == label]
    
    # mapping the value
    if label == labels[0]: # glaucoma == 1
        label_int = 1
    elif label == labels[1]: # non_glaucoma == 0
        label_int = 0

    # copy the files to the new directories
    ## handle the fundus images
    for _, row in fundus_images_label.iterrows():
        file_name = f"fff_{label_int}_{row.id}_{row.eye_side}_{row.path[-5].lower()}.jpg"
        new_path = os.path.join("./../dataset_used/fundus_image/",
                                label, file_name)
        try:
            shutil.copy(os.path.join("./../", row.path),
                        new_path)
            new_file_name.loc[len(new_file_name)] = [row.id, file_name, new_path]
        except FileExistsError:
            print(f"File {file_name} already exists")

    ## handle the oct images
    for _, row in oct_images_label.iterrows():
        file_name = f"oct_{label_int}_{row.id}_{row.eye_side}_{row.path[-5].lower()}.jpg"
        new_path = os.path.join("./../dataset_used/oct_image/",
                                label, file_name)
        try:
            shutil.copy(os.path.join("./../", row.path),
                        new_path)
            new_file_name.loc[len(new_file_name)] = [row.id, file_name, new_path]
        except FileExistsError:
            print(f"File {file_name} already exists")

In [8]:
new_file_name.to_csv("./../data/resructured_metadata.csv", index=False)