In [1]:
import cv2
import numpy as np
import os
import pandas as pd
import random
from imutils import paths


from PIL import Image
from sklearn.model_selection import train_test_split, GridSearchCV
from collections import Counter


In [2]:
def color_extraction(img):
    b, g, r = cv2.split(img)
    mean_b, std_b = cv2.meanStdDev(b)
    mean_g, std_g = cv2.meanStdDev(g)
    mean_r, std_r = cv2.meanStdDev(r)
    return {"mean_b": mean_b, "std_b": std_b, "mean_g": mean_g, "std_g": std_g, "mean_r": mean_r, "std_r": std_r}
    

In [3]:
DATASET_PATH = "../Datasets/OrigDataset"
CSV_FILE_PATH = os.path.join(DATASET_PATH, "GroundTruth.csv")
# define the path to the images and masks dataset
SEG_PATH = "../Datasets/new_data"
IMAGE_DATASET_PATH = os.path.join(SEG_PATH, "originals")
MASK_DATASET_PATH = os.path.join(SEG_PATH, "masks")


In [15]:
# Type of tumor
melanoma = []
nevus = []
basal_cell_carcinoma = []
actinic_keratoses_and_intraepithelial_carcinoma = []
benign_keratosis_like_lesions = []
dermatofibroma  = []
vascular_lesion = []

lesion_dict = {
    "MEL": 0, 
    "NV": 1,
    "BCC": 2,
    "AKIEC": 3,
    "BKL": 4,
    "DF": 5,
    "VASC": 6
}

new_lesion_dict = {
    0: [], 
    1: [],
    2: [],
    3: [],
    4: [],
    5: [],
    6: []
}



In [5]:
imagePaths = sorted(list(paths.list_images(IMAGE_DATASET_PATH)))
maskPaths = sorted(list(paths.list_images(MASK_DATASET_PATH)))


In [6]:
results = []

for i in range(len(imagePaths)):
    path_original = imagePaths[i]
    path_label = maskPaths[i]
    
    original = cv2.imread(path_original)
    label = cv2.imread(path_label, 0)
    
    # Expand dimensions of mask array
    y = np.expand_dims(label, axis=2)
    newmask = np.concatenate((y, y, y), axis=2)
    
    # Multiply new mask with image
    cob = original * newmask
    #original = original.squeeze()
    #original.save("original.png")

    result = color_extraction(cob)
    results.append(result)
    
feature_names = list(sorted(filter ( lambda k: k.startswith(""), results[0] )))


In [7]:
feature_names 

['mean_b', 'mean_g', 'mean_r', 'std_b', 'std_g', 'std_r']

In [8]:
df = pd.read_csv(CSV_FILE_PATH)
categories = []
for index, row in df.iterrows():
    if row.eq(1).any():
        categories.append(lesion_dict[row[row.eq(1)].index[0]])
        

In [9]:
samples = np.zeros((len(imagePaths),len(feature_names)))
for case_id in range(0 ,len(imagePaths)):
    a = np.array([])
    for feature_name in feature_names:
        a = np.append(a, results[case_id][feature_name])
    samples[case_id,:] = a
    
# May have NaNs
samples = np.nan_to_num(samples)


In [None]:
samples

In [None]:
len(categories)


In [None]:
len(samples)


In [11]:
Counter(categories)


Counter({1: 6705, 0: 1113, 4: 1099, 2: 514, 3: 327, 6: 142, 5: 115})

In [16]:
for i in range(len(categories)):
    new_lesion_dict[categories[i]].append(samples[i])
        

In [None]:
X =[]
y = []
for category in new_lesion_dict:
    for i in range(len(category)):
        X.append(new_lesion_dict[category][i])
        y.append(category)



In [None]:
df = pd.DataFrame()


In [None]:
column_names = feature_names.copy()


In [None]:
column_names.append('categories')


In [None]:
for col in column_names:
    df[col] = []
    

In [None]:
df


In [None]:
list_of_arrays = X.copy()
single_list = y.copy()


In [None]:
len(list_of_arrays[0])


In [None]:
for i in range(len(list_of_arrays)):
    if i < len(single_list):
        list_of_arrays[i] = np.append(list_of_arrays[i], single_list[i])


In [None]:
len(list_of_arrays[0])


In [None]:
for array in list_of_arrays:
    df = df.append(pd.DataFrame([array], columns=df.columns), ignore_index=True)
    

In [None]:
df['categories'] = df['categories'].astype(int)


In [None]:
print(df.dtypes)

In [None]:
df


In [None]:
df.to_csv('../Datasets/colors.csv')
