In [1]:
import os
import cv2
import glob
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Get Folder Path
train_folder_path = glob.glob("dataset/train/*")
test_folder_path = glob.glob("dataset/test/*")

In [3]:
import json

# Load JSON for label mapping
f = open("utils/label_encode.json")

labels_map = json.load(f)
print(labels_map)

f.close()

{'actinic keratosis': {'label': 0, 'abbreviation': 'akiec'}, 'basal cell carcinoma': {'label': 1, 'abbreviation': 'bcc'}, 'pigmented benign keratosis': {'label': 2, 'abbreviation': 'bkl'}, 'dermatofibroma': {'label': 3, 'abbreviation': 'df'}, 'melanoma': {'label': 4, 'abbreviation': 'mel'}, 'melanocytic nevus': {'label': 5, 'abbreviation': 'nv'}, 'vascular lesions': {'label': 6, 'abbreviation': 'vasc'}}


In [4]:
from collections import Counter

def get_image_by_path(folder_path, label_mapping):
    def label_to_encode(label_name, label_mapping):
        encode_label = 0
        for keys in label_mapping.keys():
            if label_name == keys:
                encode_label = label_mapping[keys]['label']

        return encode_label

    images = []
    labels = []
    paths = []

    for idx, path in enumerate(folder_path):
        # Load each specific image location
        all_image_path = glob.glob(path + "/*")
        # Extract label
        # Fix path inconsistencies on windows
        real_label = path.replace("\\", "/").replace("//", "/").split("/")[2]
        # Change label into encoded format
        encode_label = label_to_encode(real_label, label_mapping)

        # Loop through an array to load image
        for idx2, img_path in enumerate(all_image_path):
            # Load image
            image = cv2.imread(img_path)
            paths.append(img_path)
            images.append(image)
            labels.append(encode_label)
        #print(all_image_path)

    return np.array(images), np.array(labels), np.array(paths)

def check_class_ratio(data):
    counter = Counter(data)
    for k, v in counter.items():
        per= v / len(data) * 100
        print('Class=%d, n=%d (%.3f%%)' % (k, v, per))
    print("Total Data : {}".format(len(data)))

In [5]:
x_train, y_train, x_path = get_image_by_path(train_folder_path, labels_map)

In [6]:
import random

def generate_random_combination(n_combination=2, first_group_length=1, second_group_length=1):
    combination = []
    
    itr = 0
    # Looping to generate n combination
    # Only possible for 11 combination, if 11 combination has been fulfilled loop may go infinity
    while itr < n_combination:
        # Generate with range 0f 0-3 and 0-2
        temp = [random.randrange(0, first_group_length), random.randrange(0, second_group_length)]
        
        # Avoid combination [0, 0]
        if temp == [0, 0]:
            continue
        
        # Assign for first iteration
        if itr == 0:
            combination.append(temp)
            itr += 1
        
        # Assign for second and later iteration
        if itr != 0:      
            # Gather Information upwards/backward of array
            isTheSame = False
            for data in combination:
                if data == temp:
                    isTheSame = True
                    break
                    
            # if no similiarity to backward array temp succesfully added
            if isTheSame == False:
                combination.append(temp)
                itr += 1
            else:
                continue

    return np.asarray(combination)

<h5>Augmentation List </h5>
<p>First Group</p>
<p>0. None</p>
<p>1. Blur 5</p>
<p>2. Blur 7</p>
<p>3. Gaussian Noise 10.0</p>
<p>4. Horizontal Flip</p>
<p>5. Zoom 1.5</p>
<p>6. Zoom 2.0</p>

<p>Second Group</p>
<p>0. None</p>
<p>1. vertical flip -1</p>
<p>2. brightness -30</p>
<p>3. contrast 0.8 20</p>
<p>4. vertical flip 1</p>
<p>5. Gamma Correction 1.9</p>
<p>6. Gamma Correction 2.25</p>

In [7]:
FIRST_COMBINATION_MAPPING = [
    ['None', 0],
    ['gblur', 5],
    ['gblur', 7],
    ['gnoise', 10],
    ['hflip', 0],
    ['zoom', 1.5],
    ['zoom', 2.0],
]

SECOND_COMBINATION_MAPPING = [
    ['None', 0],
    ['vflip', -1],
    ['brightness', -30],
    ['contrast', (0.8, 20)],
    ['vflip', 1],
    ['gcorrection', 1.9],
    ['gcorrection', 2.25],
]

In [8]:
%load_ext autoreload
%autoreload 2

# First Group Augmentation
def first_group(image, *argv, preference='hflip'):
    if preference == 'hflip':
        result = cv2.flip(image, 1)
    elif preference == 'gnoise':
        intensity = argv[0]
        gauss = np.random.normal(0,intensity, image.size)
        gauss = gauss.reshape(image.shape[0],image.shape[1],image.shape[2]).astype('uint8')
        result = np.add(image, gauss)
    elif preference == 'gblur':
        kernel = (argv[0], argv[0])
        result = cv2.GaussianBlur(image, kernel, 0)
    elif preference == 'zoom':
        zoom_factor = argv[0]
        x, y, _ = image.shape

        # Define new boundaries
        x1 = int(0.5*x*(1-1/zoom_factor))
        x2 = int(x-0.5*x*(1-1/zoom_factor))
        y1 = int(0.5*y*(1-1/zoom_factor))
        y2 = int(y-0.5*y*(1-1/zoom_factor))

        result = image[y1:y2, x1:x2]
        result = cv2.resize(result, None, fx=zoom_factor, fy=zoom_factor)
    elif preference == 'None':
        result = image

    return result

# Second Group Augmentation
def second_group(image, *argv, preference='gcorrection'):

    if preference == 'gcorrection':
        gamma = argv[0]
        invGamma = 1.0 / gamma
        table = np.array([((i / 255.0) ** invGamma) * 255
            for i in np.arange(0, 256)]).astype("uint8")

        result = cv2.LUT(image, table)
    
    elif preference == 'vflip':
        rotate = argv[0]
        result = cv2.flip(image, rotate)
        return result

    elif preference == 'hue':
        hue_adjustment_value = argv[0]
        temp = cv2.cvtColor(image, cv2.COLOR_BGR2HSV).astype('float32')
        (h , s, v) = cv2.split(temp)
        h = cv2.add(h, hue_adjustment_value)
        result = cv2.merge([h, s, v])
        result = cv2.cvtColor(result.astype('uint8'), cv2.COLOR_HSV2BGR)

    elif preference == 'saturation':
        saturation_adjustment_value = argv[0]
        temp = cv2.cvtColor(image, cv2.COLOR_BGR2HSV).astype('float32')
        (h , s, v) = cv2.split(temp)
        s = s * saturation_adjustment_value
        s = np.clip(s, 0, 255)
        result = cv2.merge([h, s, v])
        result = cv2.cvtColor(result.astype('uint8'), cv2.COLOR_HSV2BGR)

    elif preference == 'brightness':
        brightness = argv[0]
        if brightness != 0:
            if brightness > 0:
                shadow = brightness
                highlight = 255
            else:
                shadow = 0
                highlight = 255 + brightness
            alpha_b = (highlight - shadow)/255
            gamma_b = shadow
            buf = cv2.addWeighted(image, alpha_b, image, 0, gamma_b)
        else:
            buf = image.copy()
            
        return buf  
            
    elif preference == 'contrast':
        # Float default is 1.5, int default 20 respectively
        alpha, beta = argv[0]
        return cv2.addWeighted(image, alpha, np.zeros(image.shape, image.dtype), 0, beta)
    elif preference == 'None':
        result = image

    return result

# Combination Handler
def combination_handler(image, combinations):
    first, second = combinations
    first_pick, second_pick = FIRST_COMBINATION_MAPPING[first], SECOND_COMBINATION_MAPPING[second]
    
    image = first_group(image, first_pick[1], preference=first_pick[0])
    image = second_group(image, second_pick[1], preference=second_pick[0])

    return image
        

In [9]:
from collections import Counter
class_totals = []
class_sort = []

# Gather each classes total data and max classes information
counter = Counter(y_train)
for k, v in counter.items():
    per= v / len(y_train) * 100
    class_totals.append(v)
    class_sort.append(k)
    print('Class=%d, n=%d (%.3f%%)' % (k, v, per))

max_classes = max(class_totals)
max_classes_idx = class_totals.index(max_classes)

print("Class Total each : {} index max : {}".format(max_classes, max_classes_idx))
print(class_sort)
class_totals

Class=0, n=228 (3.770%)
Class=1, n=453 (7.490%)
Class=3, n=113 (1.868%)
Class=5, n=3539 (58.515%)
Class=4, n=784 (12.963%)
Class=2, n=789 (13.046%)
Class=6, n=142 (2.348%)
Class Total each : 3539 index max : 3
[0, 1, 3, 5, 4, 2, 6]


[228, 453, 113, 3539, 784, 789, 142]

In [10]:
current_img = 0
save_path = 'dataset/train'

distinct_id = 0
for idx, class_total in enumerate(class_totals):
    # Avoid max classes index
    # The max classes count is used for reference
    if idx == max_classes_idx:
        continue
    
    # Find foldername within label
    for key in labels_map.keys():
        if labels_map[key]['label'] == class_sort[idx]:
            folder_label_name = key

    # Gather required combination
    required_combination = max_classes // class_total
    # Get combination
    combinations = generate_random_combination(n_combination=required_combination, first_group_length=7, second_group_length=7)
    
    # Loop over each combinations array
    for idx_comb, combination in enumerate(combinations):
        # Loop Through each x_train that had same label
        for idx_img, image in enumerate(x_train):
            if y_train[idx_img] != class_sort[idx]:
                continue

            # Proceed the augmentation
            image = combination_handler(image, combination)

            # Extract image name
            real_image_name = x_path[idx_img].replace("\\", "/").replace("//", "/")
            real_image_name = real_image_name.split(".")[0].split("/")[3]

            # Save back the augmentation result
            image_name = str(distinct_id) + "_augmented_" + str(class_sort[idx]) + "_" + real_image_name + ".jpg"
            final_save_path = os.path.join(save_path, folder_label_name, image_name)
            #print(final_save_path)
            cv2.imwrite(final_save_path, image)

            distinct_id += 1