In [2]:
import os
import cv2
import glob
import numpy as np
import matplotlib.pyplot as plt

In [5]:
train_folder_path = glob.glob("dataset/train/*")
test_folder_path = glob.glob("dataset/test/*")

In [3]:
import json

f = open("utils/label_encode.json")

label_map = json.load(f)
print(label_map)

f.close()

{'actinic keratosis': {'label': 0, 'abbreviation': 'akiec'}, 'basal cell carcinoma': {'label': 1, 'abbreviation': 'bcc'}, 'pigmented benign keratosis': {'label': 2, 'abbreviation': 'bkl'}, 'dermatofibroma': {'label': 3, 'abbreviation': 'df'}, 'melanoma': {'label': 4, 'abbreviation': 'mel'}, 'melanocytic nevus': {'label': 5, 'abbreviation': 'nv'}, 'vascular lesions': {'label': 6, 'abbreviation': 'vasc'}}


In [23]:
from collections import Counter

def get_image_by_path(folder_path, label_mapping):
    def label_to_encode(label_name, label_mapping):
        encode_label = 0
        for keys in label_mapping.keys():
            if label_name == keys:
                encode_label = label_mapping[keys]['label']

        return encode_label

    images = []
    labels = []

    for idx, path in enumerate(folder_path):
        # Load each specific image location
        all_image_path = glob.glob(path + "/*")
        # Extract label
        # Fix path inconsistencies on windows
        real_label = path.replace("\\", "/").replace("//", "/").split("/")[2]
        # Change label into encoded format
        encode_label = label_to_encode(real_label, label_mapping)

        # Loop through an array to load image
        for idx2, img_path in enumerate(all_image_path):
            # Load image
            image = cv2.imread(img_path)
            images.append(image)
            labels.append(encode_label)
        #print(all_image_path)

    return np.array(images), np.array(labels)

def check_class_ratio(data):
    counter = Counter(data)
    for k, v in counter.items():
        per= v / len(data) * 100
        print('Class=%d, n=%d (%.3f%%)' % (k, v, per))
    print("Total Data : {}".format(len(data)))

In [24]:
x_train, y_train = [], []

x_train, y_train = get_image_by_path(test_folder_path, label_map)

In [25]:
check_class_ratio(y_train)

Class=0, n=229 (3.787%)
Class=1, n=453 (7.491%)
Class=3, n=113 (1.869%)
Class=5, n=3539 (58.525%)
Class=4, n=783 (12.949%)
Class=2, n=788 (13.031%)
Class=6, n=142 (2.348%)
Total Data : 6047


In [26]:
import random

def generate_random_combination(n_combination=2, first_group_length, second_group_length):
    combination = []
    
    itr = 0
    # Looping to generate n combination
    # Only possible for 11 combination, if 11 combination has been fulfilled loop may go infinity
    while itr < n_combination:
        # Generate with range 0f 0-3 and 0-2
        temp = [random.randrange(0, first_group_length + 1), random.randrange(0, second_group_length + 1)]
        
        # Avoid combination [0, 0]
        if temp == [0, 0]:
            continue
        
        # Assign for first iteration
        if itr == 0:
            combination.append(temp)
            itr += 1
        
        # Assign for second and later iteration
        if itr != 0:      
            # Gather Information upwards/backward of array
            isTheSame = False
            for data in combination:
                if data == temp:
                    isTheSame = True
                    break
                    
            # if no similiarity to backward array temp succesfully added
            if isTheSame == False:
                combination.append(temp)
                itr += 1
            else:
                continue

    return np.asarray(combination)