In [1]:
import os
import sys
from dotenv import load_dotenv

load_dotenv()
sys.path.append(os.environ.get('PATH_CUSTOM_MODULES'))

import augment_image
import data_prep

import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.10.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


### Prepare all basic variable

In [2]:
path_source = os.environ.get('PATH_DATASET_DESTINATION')
scenario_names = ['scenario_2', 'scenario_3'] # scenario 1 is the original dataset
dataset_names = ['rimone', 'g1020', 'refuge', 'papila']
fold_names = ['fold_1', 'fold_2', 'fold_3', 'fold_4', 'fold_5']
labels_name = ['normal', 'glaukoma']
image_size = {'rimone': (300,300),
            'g1020': (240,300),
            'refuge': (300,300),
            'papila': (200,300)}

### Prepare the path source and destination

In [3]:
# merge path source and path destination
# for each dataset, scenario, and label
path_dataset_src = {}
path_dataset_aug = {}
path_dataset_merge = {}
for scenario in scenario_names:
    for dataset in dataset_names:
        for fold in fold_names:
            for label in labels_name:
                ## create the source path for training data
                path_dataset_src[f'{scenario}_'
                                + f'{dataset}_'
                                + f'{fold}_'
                                + label] = os.path.join(path_source,
                                                        scenario,
                                                        dataset,
                                                        fold,
                                                        'train',
                                                        label)
                ## create the destination path a.k.a. augmented path for training data
                path_dataset_aug[scenario + '_'
                                + dataset + '_'
                                + fold + '_'
                                + label] = os.path.join(path_source,
                                                        scenario,
                                                        dataset,
                                                        fold,
                                                        'train_augmented',
                                                        label)
                ## create the merge path for training data
                path_dataset_merge[f'{scenario}_'
                                    + f'{dataset}_'
                                    + f'{fold}_'
                                    + label] = os.path.join(path_source,
                                                            scenario,
                                                            dataset,
                                                            fold,
                                                            'train_merged',
                                                            label)
del scenario, dataset, fold, label

### Prepare the image data generator

In [4]:
# data generator for scenario 3(only clahe)
datagenerator_s3 = ImageDataGenerator(
    rescale=1./255,
    preprocessing_function=augment_image.clahe_augmentation
)

### Prepare the merged directory

In [6]:
# create the directory for the augmented dataset
directory_result = augment_image.create_directory(path_dict=path_dataset_merge)

## print the result
for key, values in directory_result.items():
    if key == 'Already Exists' and values != []:
        for value in values:
            print('Directory already exists:', value)
        value = ''
del key, values, value, directory_result

Directory already exists: scenario_2_rimone_fold_1_normal
Directory already exists: scenario_2_rimone_fold_1_glaukoma
Directory already exists: scenario_2_rimone_fold_2_normal
Directory already exists: scenario_2_rimone_fold_2_glaukoma
Directory already exists: scenario_2_rimone_fold_3_normal
Directory already exists: scenario_2_rimone_fold_3_glaukoma
Directory already exists: scenario_2_rimone_fold_4_normal
Directory already exists: scenario_2_rimone_fold_4_glaukoma
Directory already exists: scenario_2_rimone_fold_5_normal
Directory already exists: scenario_2_rimone_fold_5_glaukoma
Directory already exists: scenario_2_g1020_fold_1_normal
Directory already exists: scenario_2_g1020_fold_1_glaukoma
Directory already exists: scenario_2_g1020_fold_2_normal
Directory already exists: scenario_2_g1020_fold_2_glaukoma
Directory already exists: scenario_2_g1020_fold_3_normal
Directory already exists: scenario_2_g1020_fold_3_glaukoma
Directory already exists: scenario_2_g1020_fold_4_normal
Direc

In [8]:
# get the list of images in the source directory
original_files = {}
augmented_files = {}
## for the original image
for key, value in path_dataset_src.items():
    original_files[key] = data_prep.get_file_names(path=value)
del key, value
## for the augmented image
for key, value in path_dataset_aug.items():
        augmented_files[key] = data_prep.get_file_names(path=value)
del key, value

In [None]:
# splitting the data for each scenario
s2_ori_files = {}
s2_aug_files = {}
s3_ori_files = {}
s3_aug_files = {}

## splitting the original files
for key, value in original_files.items():
    if 'scenario_2' in key:
        s2_ori_files[key] = value
    elif 'scenario_3' in key:
        s3_ori_files[key] = value
    else:
        print('Error:', key)
del key, value
## splitting the augmented files
for key, value in augmented_files.items():
    if 'scenario_2' in key:
        s2_aug_files[key] = value
    elif 'scenario_3' in key:
        s3_aug_files[key] = value
    else:
        print('Error:', key)

### Merge the original and augmented data

#### Scenario 2

In [40]:
# copy the image from the source to the destination
copy_result = {
    'image type': [],
    'id': [],
    'Already Exists': [],
    'Success': []
}

## for the original image
for key, value in path_dataset_src.items():
    ### copying the original file into the merged directory
    temp = data_prep.copy_files(source_path=value,
                                destination_path=path_dataset_merge[key],
                                file_names=original_files[key])
    ### removing the previous original file
    augment_image.remove_file(files_path=[os.path.join(path_dataset_merge[key],
                                                        file) for file in original_files[key]])
    
    copy_result['image type'].append('original')
    copy_result['id'].append(key)
    copy_result['Already Exists'].append(len(temp['Already Exists']))
    copy_result['Success'].append(len(temp['Success']))
del key, value, temp

## for the augmented image
for key, value in path_dataset_aug.items():
    ### copying the augmented file into the merged directory
    temp = data_prep.copy_files(source_path=value,
                                destination_path=path_dataset_merge[key],
                                file_names=augmented_files[key])
    ### removing the previous augmented file
    augment_image.remove_file(files_path=[os.path.join(path_dataset_merge[key],
                                                        file) for file in augmented_files[key]])
    
    copy_result['image type'].append('augmented')
    copy_result['id'].append(key)
    copy_result['Already Exists'].append(len(temp['Already Exists']))
    copy_result['Success'].append(len(temp['Success']))
del key, value, temp

KeyboardInterrupt: 