In [4]:
import numpy as np 
import seaborn as sb
import matplotlib.pyplot as plt
import cv2 
import os
import re
from scipy import ndimage as ndi


In [5]:
def normalize_label(str_):
    str_ = str_.replace(" ", "")
    str_ = str_.translate(str_.maketrans("","", "()"))
    str_ = str_.split("_")
    return ''.join(str_[:2])

def normalize_desc(folder, sub_folder):
    text = folder + " - " + sub_folder 
    text = re.sub(r'\d+', '', text)
    text = text.replace(".", "")
    text = text.strip()
    return text

def print_progress(val, val_len, sub_folder, filename, bar_size=10):
    progr = "#"*round((val)*bar_size/val_len) + " "*round((val_len - (val))*bar_size/val_len)
    if val == 0:
        print("", end = "\n")
    else:
        print("[%s] folder : %s/ ----> file : %s" % (progr, sub_folder, filename), end="\r")

In [7]:
dataset_dir = "balanced_datasets/" 

imgs = [] #list image matrix 
labels = []
img_ids = []
descs = []
for sub_folder in os.listdir(dataset_dir):
    sub_folder_files = os.listdir(os.path.join(dataset_dir, sub_folder))
    len_sub_folder = len(sub_folder_files) - 1
    for i, filename in enumerate(sub_folder_files):
        img = cv2.imread(os.path.join(dataset_dir, sub_folder, filename))
        
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        resize = cv2.resize(gray,(150, 150), interpolation = cv2.INTER_AREA )

        retval, label, centers = cv2.kmeans (
            resize.flatten().astype("float32"), 
            2, 
            None, 
            (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.90), 
            10,cv2.KMEANS_PP_CENTERS)

        if (centers[0,0]>centers[1,0]):
            centers[0,0],centers[1,0] = centers[1,0],centers[0,0]
            label = 1-label

        segment = np.array(label.reshape(resize.shape))
        segment_not = cv2.bitwise_not(segment)
        fill_lungs = np.array(ndi.binary_fill_holes(segment), dtype=int )
        gabung = cv2.bitwise_and(segment_not, fill_lungs)
        mask = np.array(ndi.binary_fill_holes(gabung), dtype=int )
        segment_res = (resize * mask)

        imgs.append(segment_res)
        labels.append(normalize_label(sub_folder))
        img_ids.append(filename)

        # descs.append(normalize_desc(folder, sub_folder))
        
        print_progress(i, len_sub_folder, sub_folder, filename)


[##########] folder : covid/ ----> file : COVID-2451.png



In [8]:
from skimage.feature import greycomatrix, greycoprops

def calc_glcm_all_agls(img, label, img_id, props, dists=[5], agls=[0, np.pi/4, np.pi/2, 3*np.pi/4], lvl=256, sym=True, norm=True):
    
    glcm = greycomatrix(img, 
                        distances=dists, 
                        angles=agls, 
                        levels=lvl,
                        symmetric=sym, 
                        normed=norm)
    feature = []
    feature.append(img_id) 
    glcm_props = [propery for name in props for propery in greycoprops(glcm, name)[0]]
    for item in glcm_props:
        feature.append(item)
    feature.append(label) 

    # //someting to do with image down here, add glcm power of 
    
    return feature

In [9]:
properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']

glcm_all_agls = []
for img, label,img_id in zip(imgs, labels, img_ids): 
    glcm_all_agls.append(
        calc_glcm_all_agls(img, 
                        label,
                        img_id,
                        props=properties)
    )
 

In [10]:
columns = []
angles = ['0', '45', '90','135']
columns.append("img_ids")
for name in properties :
    for ang in angles:
        columns.append(name + "_" + ang)
columns.append("label")

In [11]:
import pandas as pd 

# Create the pandas DataFrame for GLCM features data
glcm_df = pd.DataFrame(glcm_all_agls, 
                      columns = columns)


In [12]:
glcm_df.head(7)

Unnamed: 0,img_ids,dissimilarity_0,dissimilarity_45,dissimilarity_90,dissimilarity_135,correlation_0,correlation_45,correlation_90,correlation_135,homogeneity_0,...,contrast_135,ASM_0,ASM_45,ASM_90,ASM_135,energy_0,energy_45,energy_90,energy_135,label
0,COVID-679.png,14.447172,14.489445,11.473655,14.630278,0.643208,0.654827,0.737768,0.652007,0.66449,...,1255.60213,0.419953,0.413782,0.437067,0.411191,0.648038,0.643259,0.661111,0.641242,covid
1,COVID-1452.png,1.439264,1.383374,0.940276,1.302308,0.229263,0.269788,0.494109,0.315837,0.987147,...,161.30972,0.974149,0.974367,0.978419,0.974923,0.98699,0.9871,0.989151,0.987382,covid
2,COVID-1310.png,0.460046,0.469413,0.406529,0.399512,-0.001565,-0.001597,0.115583,0.150568,0.996874,...,58.61231,0.993758,0.993631,0.994124,0.994098,0.996874,0.99681,0.997058,0.997045,covid
3,COVID-2634.png,0.062437,0.05578,0.054667,0.05578,-0.000184,-0.000164,-0.000161,-0.000164,0.999632,...,9.47462,0.999265,0.999343,0.999356,0.999343,0.999632,0.999672,0.999678,0.999672,covid
4,COVID-271.png,10.834207,10.272096,6.622897,10.186996,0.573416,0.608316,0.756229,0.611782,0.838845,...,1096.656971,0.697555,0.698763,0.725766,0.698998,0.835198,0.83592,0.851919,0.836061,covid
5,COVID-2412.png,15.034621,14.265153,9.659678,14.294145,0.689171,0.715214,0.813883,0.714238,0.753376,...,1812.775943,0.542669,0.541731,0.572107,0.540485,0.736661,0.736024,0.756378,0.735177,covid
6,COVID-1617.png,2.038529,1.778992,1.371356,1.809955,0.065211,0.126443,0.296754,0.110729,0.984202,...,241.166119,0.968534,0.971685,0.975511,0.971408,0.984141,0.985741,0.98768,0.9856,covid


In [13]:
#save to csv
glcm_df.to_csv("segmentated_data.csv")
