In [94]:
import numpy as np 
import cv2 
import os
import re

In [101]:
def normalize_label(str_):
    str_ = str_.replace(" ", "")
    str_ = str_.translate(str_.maketrans("","", "()"))
    str_ = str_.split("_")
    return ''.join(str_[:2])

def normalize_desc(folder, sub_folder):
    text = folder + " - " + sub_folder 
    text = re.sub(r'\d+', '', text)
    text = text.replace(".", "")
    text = text.strip()
    return text

def print_progress(val, val_len, folder, sub_folder, filename, bar_size=10):
    progr = "#"*round((val)*bar_size/val_len) + " "*round((val_len - (val))*bar_size/val_len)
    if val == 0:
        print("", end = "\n")
    else:
        print("[%s] folder : %s/%s/ ----> file : %s" % (progr, folder, sub_folder, filename), end="\r")

## Preprocessing

- convert to grayscale 
- crop ROI
- resize image

In [102]:
dataset_dir = "DATASET/" 

imgs = [] #list image matrix 
labels = []
descs = []
for folder in os.listdir(dataset_dir):
    for sub_folder in os.listdir(os.path.join(dataset_dir, folder)):
        sub_folder_files = os.listdir(os.path.join(dataset_dir, folder, sub_folder))
        len_sub_folder = len(sub_folder_files) - 1
        for i, filename in enumerate(sub_folder_files):
            img = cv2.imread(os.path.join(dataset_dir, folder, sub_folder, filename))
            
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            h, w = gray.shape
            ymin, ymax, xmin, xmax = h//3, h*2//3, w//3, w*2//3
            crop = gray[ymin:ymax, xmin:xmax]
            
            resize = cv2.resize(crop, (0,0), fx=0.5, fy=0.5)
            
            imgs.append(resize)
            labels.append(normalize_label(os.path.splitext(filename)[0]))
            descs.append(normalize_desc(folder, sub_folder))
            
            print_progress(i, len_sub_folder, folder, sub_folder, filename)

            


[##########] folder : 1. Light Coffee/1.1 Green Coffee/ ----> file : L_GC_ (9).JPGG
[##########] folder : 1. Light Coffee/1.2 Begins To Pale/ ----> file : L_BTP_ (9).JPGG
[##########] folder : 1. Light Coffee/1.3 Early Yellow/ ----> file : L_EY_ (9).JPGG
[##########] folder : 1. Light Coffee/1.4 Yellow-Tan/ ----> file : L_YT_ (9).JPGG
[##########] folder : 2. Medium Coffee/2.1 Light Brown/ ----> file : M_LB_ (9).JPGG
[##########] folder : 2. Medium Coffee/2.2 Brown/ ----> file : M_B_ (9).JPGG
[##########] folder : 2. Medium Coffee/2.3 1st Crack Start/ ----> file : M_FCS_ (9).JPGG
[##########] folder : 2. Medium Coffee/2.4 1st Crack Done/ ----> file : M_FCD_ (9).JPGG
[##########] folder : 3. Dark Coffee/3.1 City Roast/ ----> file : D_CR_ (9).JPGG
[##########] folder : 3. Dark Coffee/3.2 City+/ ----> file : D_C+_ (9).JPGG
[##########] folder : 3. Dark Coffee/3.3 Full City/ ----> file : D_FC_ (9).JPGG
[##########] folder : 3. Dark Coffee/3.4 Full City+ 2nd Crack/ ----> file : D_FC2C_ (9)

In [103]:
descs

['Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Green Coffee',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee -  Begins To Pale',
 'Light Coffee 

In [163]:
cv2.imshow("test img", imgs[0])

cv2.waitKey(0)
cv2.destroyAllWindows()

### Feature Extraction - GLCM 

In [67]:
from skimage.feature import greycomatrix, greycoprops

#### Library
https://scikit-image.org/docs/stable/api/skimage.feature.html#skimage.feature.greycoprops


#### Metric GLCM :
![](resource/metric_glcm.PNG)

In [73]:
glcm = greycomatrix(imgs[0], distances=[5], angles=[0], levels=256,
                    symmetric=True, normed=True)
print('dissimilarity \t: %.04f' % greycoprops(glcm, 'dissimilarity')[0, 0])
print('correlation \t: %.04f' % greycoprops(glcm, 'correlation')[0, 0])
print('homogeneity \t: %.04f' % greycoprops(glcm, 'homogeneity')[0, 0])
print('contrast \t: %.04f' % greycoprops(glcm, 'contrast')[0, 0])
print('ASM \t\t: %.04f' % greycoprops(glcm, 'ASM')[0, 0])
print('energy \t\t: %.04f' % greycoprops(glcm, 'energy')[0, 0])

dissimilarity 	: 3.2255
correlation 	: 0.9859
homogeneity 	: 0.4271
contrast 	: 42.8574
ASM 		: 0.0054
energy 		: 0.0737


In [84]:
def calc_glcm_feature(img, label, dists=[5], agls=[0], lvl=256, sym=True, norm=True):
    
    glcm = greycomatrix(img, 
                        distances=dists, 
                        angles=agls, 
                        levels=lvl,
                        symmetric=sym, 
                        normed=norm)
    feature = []
    feature.append(greycoprops(glcm, 'dissimilarity')[0, 0])
    feature.append(greycoprops(glcm, 'correlation')[0, 0])
    feature.append(greycoprops(glcm, 'homogeneity')[0, 0])
    feature.append(greycoprops(glcm, 'contrast')[0, 0])
    feature.append(greycoprops(glcm, 'ASM')[0, 0])
    feature.append(greycoprops(glcm, 'energy')[0, 0])
    feature.append(label)
    
    return feature

In [86]:
glcm_features = []
for img, label in zip(imgs, labels): 
    glcm_features.append(calc_glcm_feature(img, label))

In [87]:
import pandas as pd 

In [88]:
# Create the pandas DataFrame for GLCM features data
glcm_features_df = pd.DataFrame(glcm_features, 
                                columns = ['dissimilarity', 
                                           'correlation',
                                           'homogeneity',
                                           'contrast',
                                           'ASM',
                                           'energy',
                                           'label'])

In [164]:
glcm_features_df.head(7)

Unnamed: 0,dissimilarity,correlation,homogeneity,contrast,ASM,energy,label
0,3.225539,0.985857,0.427142,42.857421,0.005439,0.073748,LGC
1,2.774717,0.978049,0.473154,34.176164,0.007451,0.086321,LGC
2,2.753947,0.978409,0.476367,33.495342,0.007596,0.087156,LGC
3,2.814796,0.977401,0.468644,35.156059,0.007361,0.085799,LGC
4,2.797355,0.97916,0.471095,33.945474,0.007071,0.084089,LGC
5,2.831539,0.978588,0.467817,34.898066,0.007003,0.083682,LGC
6,3.039243,0.987625,0.439864,38.781414,0.005614,0.074929,LGC
