In [None]:
import itertools
import cv2
import os
import shutil
import glob
import numpy as np 
from PIL import Image
from skimage.color import rgb2gray, rgb2hsv
from matplotlib import pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

### Configurations for Data Preparation

In [None]:
config = {
    'setup_dir': True,
    # 240 -> 60%, 20%, 20%
    'total_dataset_count': 240,
    'split': {
        'training': .60,
        'validation': .20,
        'testing': .20,
    },
    'classes': ['blast', 'blight', 'tungro'],
    'folders' = ['training', 'validation' ,'testing']
    'src_path': 'Rice diseases exclusively' # source path
    'dst_path': '_Rice diseases exclusively_with_valid', # destination path
    # --------------------------
    'data_augmentation': True,
    'ext': 'jpg',
    'augmented_images': # augmentation function
    lambda from_path, to_path, batch_size, ext, classes : ImageDataGenerator(
            height_shift_range=0.2,
            width_shift_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            fill_mode='nearest'
        ).flow_from_directory(
            directory=from_path, 
            classes=classes,
            batch_size=batch_size, 
            save_to_dir=to_path,
            save_format=ext
        )
    # --------------------------
    'preprocess' : True,
    'save_path' : 'data/_Preprocessed_Rice diseases exclusively_with_valid',
    
}

# Set variables
setup_dir = config['setup_dir']
total_dataset_count = config['total_dataset_count']
split_dict = config['split']
data_augmentation  = config['data_augmentation']
classes = config['classes']
folders = config['folders']
dataset_label = config['dataset_label']
ext = config['ext']
src_path = config['src_path']
dst_path = config['dst_path']
preprocess = config['preprocess']
num_classes = len(classes)

size = (299,299)

# Checks if classes and class_path are aligned
if setup_dir:
    for C in classes:
        assert class_path[C], 'The list "class_path" is missing a class found in list "classes"'
    for key in class_path:
        assert key in classes, 'The list "classes" is missing a class found in list "class_path"'

os.chdir('data') # data folder; contains 

### Split Dataset for Training, Validation, and Testing

In [None]:
if setup_dir:
    # make destination path folders
    os.makedirs(dst_path)
    os.chdir(dst_path)
    for C in classes:
        for f in folders:
            os.makedirs(f'{f}/{C}')
    os.chdir('../')

    # transfer from src to dst
    for C in classes:
        images = glob.glob(f'{src_path}/{foldr}/{C}/*.jpg') # filenames
        image_index = 0
        for f in folders:
            length = total_dataset_count * split_dict[f]
            for i in range(length):
                shutil.move(images[image_index], f'{dst_path}/{f}/{C}')

### Apply Data Augmentation

In [None]:
if data_augmentation:
    # augmentation to same folder
    os.chdir(dst_path)
    
    from_path = 'nonaugmented_training' # original
    to_path = 'training' # augmented
    
    range_size = 400 # 400 target images per class

    if not os.path.isdir(from_path):
        os.rename(to_path,from_path)
        os.makedirs(to_path)
    os.chdir(to_path)

    for C in classes:
        assert not os.path.isdir(C), 'Please delete all class folders in the "training" folder'
        os.makedirs(C)
    os.chdir('../')

    for C in classes:
        fn = config['augmented_images']
        augmented_images = fn(from_path=from_path, 
                              to_path=f'training/{C}', 
                              ext=ext,
                              classes=[C],
                              batch_size=1)

        for i in range(range_size):
          augmented_images.next()

    os.chdir('../')

### Preprocessing

In [None]:
def threshold(picture_rgb):
    picture_rgb = picture_rgb.astype("uint8")
    picture = cv2.cvtColor(picture_rgb, cv2.COLOR_RGB2GRAY)
    blurred_picture = cv2.GaussianBlur(picture, (15,15), cv2.BORDER_DEFAULT)

    # Simple Thresholding
    threshold, thresh = cv2.threshold(blurred_picture, 105, 255, cv2.THRESH_OTSU)
    thresh_inv = cv2.bitwise_not(thresh)
    stacked = np.dstack((thresh_inv,thresh_inv,thresh_inv))
    img = cv2.bitwise_and(picture_rgb, stacked)
    # foreground = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    foreground = img.copy()
    foreground[np.all(foreground == (0, 0, 0), axis=-1)] = (255,255,255)
    
    return foreground

def resize(img, size=(299,299)):
    return cv2.resize(img, size)

In [None]:
def create_dir(foldr):
    # create dir for preprocessed
    main_path = os.getcwd()
    
    os.chdir(save_path)
    os.makedirs(foldr)
    os.chdir(foldr)

    for C in classes:
        assert not os.path.isdir(C), f'Please delete all class folders in the {foldr} folder'
        os.makedirs(C)
    os.chdir('../../../')
    
    assert os.getcwd() == main_path, main_path

In [None]:
def create_preprocessed_images(foldr, exact_amount=False):
    dataset_path = dst_path
    
    # intended number of images
    # will be used if exact_amount is false
    range_size = 123 
    if foldr == folders[0]:
        range_size = 400
    
    for C in classes:
        if exact_amount:
            range_size = len(glob.glob(f'{dataset_path}/{foldr}/{C}/*.jpg'))

        preprocessed = ImageDataGenerator(preprocessing_function=preprocess) \
                .flow_from_directory(
                    directory=f'{dataset_path}/{foldr}', 
                    target_size=size, 
                    classes=[C], 
                    batch_size=1,
                    save_to_dir=save_path+f'/{foldr}/{C}',
                    save_format='jpg'
                )
        for i in range(range_size):
          preprocessed.next()

In [7]:
 if preprocess:
    for foldr in folders:
        create_dir(foldr)
        create_preprocessed_images(foldr, exact_amount=True)

Found 400 images belonging to 1 classes.
Found 400 images belonging to 1 classes.
Found 400 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
