In [1]:
import cv2
import os
from os import listdir
from glrlm import GLRLM
import numpy as np 
import cv2 
import os
import re
from skimage.feature import greycomatrix, greycoprops
from sklearn.svm import SVC

In [2]:

# -------------------- Utility functions ------------------------
def normalize_label(str_):
    str_ = str_.replace(" ", "")
    str_ = str_.translate(str_.maketrans("","", "()"))
    str_ = str_.split("_")
    return ''.join(str_[:2])

def normalize_desc(folder, sub_folder):
    text = folder + " - " + sub_folder 
    text = re.sub(r'\d+', '', text)
    text = text.replace(".", "")
    text = text.strip()
    return text

def print_progress(val, val_len, folder, sub_folder, filename, bar_size=10):
    progr = "#"*round((val)*bar_size/val_len) + " "*round((val_len - (val))*bar_size/val_len)
    if val == 0:
        print("", end = "\n")
    else:
        print("[%s] folder : %s/%s/ ----> file : %s" % (progr, folder, sub_folder, filename), end="\r")
        

# -------------------- Load Dataset ------------------------
 


imgs = [] #list image matrix   
labels = []
descs = []
results = []


In [12]:
dataset_dir = "dataset/Healthy/" 
for filename in os.listdir(dataset_dir):
    img = cv2.imread(os.path.join(dataset_dir, filename))
    img_median2 = cv2.medianBlur(img, 5)
    gray = cv2.cvtColor(img_median2, cv2.COLOR_BGR2GRAY)
    h, w = gray.shape
    ymin, ymax, xmin, xmax = h//3, h*2//3, w//3, w*2//3
    crop = gray[ymin:ymax, xmin:xmax]

    resize = cv2.resize(crop, (0,0), fx=0.5, fy=0.5)
    ret, resize = cv2.threshold(resize, 120, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    imgs.append(resize)
    labels.append(1)
            
#             print_progress(i, len_sub_folder, folder, sub_folder, filename)

# ----------------- calculate greycomatrix() & greycoprops() for angle 0, 45, 90, 135 ----------------------------------
def calc_glcm_all_agls(img, label, props, dists=[5], agls=[0, np.pi/4, np.pi/2, 3*np.pi/4], lvl=256, sym=True, norm=True):
    
    glcm = greycomatrix(img, 
                        distances=dists, 
                        angles=agls, 
                        levels=lvl,
                        symmetric=sym, 
                        normed=norm)
    feature = []
    glcm_props = [propery for name in props for propery in greycoprops(glcm, name)[0]]
    for item in glcm_props:
            feature.append(item)
    feature.append(label) 
    return feature


# ----------------- call calc_glcm_all_agls() for all properties ----------------------------------
properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']

glcm_all_agls = []
for img, label in zip(imgs, labels): 
    glcm_all_agls.append(
            calc_glcm_all_agls(img, 
                                label, 
                                props=properties)
                            )
 
columns = []
angles = ['0', '45', '90','135']
for name in properties :
    for ang in angles:
        columns.append(name + "_" + ang)
        
columns.append("label")

In [13]:
import pandas as pd 

# Create the pandas DataFrame for GLCM features data
glcm_df = pd.DataFrame(glcm_all_agls, 
                      columns = columns)


In [14]:
glcm_df

Unnamed: 0,dissimilarity_0,dissimilarity_45,dissimilarity_90,dissimilarity_135,correlation_0,correlation_45,correlation_90,correlation_135,homogeneity_0,homogeneity_45,...,contrast_135,ASM_0,ASM_45,ASM_90,ASM_135,energy_0,energy_45,energy_90,energy_135,label
0,36.022545,37.420513,28.835832,34.987897,0.605575,0.591226,0.684692,0.617800,0.858737,0.853255,...,8921.913788,0.520537,0.515794,0.541067,0.522625,0.721482,0.718188,0.735573,0.722928,0
1,21.824324,22.505369,18.579508,21.504477,0.828589,0.823273,0.854096,0.831132,0.914416,0.911745,...,5483.641666,0.422439,0.420140,0.433072,0.423388,0.649953,0.648182,0.658082,0.650683,0
2,27.298588,28.204032,23.367413,26.434295,0.738198,0.729687,0.775990,0.746654,0.892948,0.889398,...,6740.745118,0.495497,0.492460,0.507685,0.497902,0.703916,0.701755,0.712520,0.705622,0
3,43.113767,41.475130,36.150976,43.111441,0.655680,0.668513,0.711100,0.655432,0.830929,0.837355,...,10993.417514,0.368477,0.373147,0.387611,0.368861,0.607023,0.610858,0.622584,0.607339,0
4,46.773281,46.187211,45.491846,49.994699,0.626393,0.631317,0.636999,0.600923,0.816578,0.818876,...,12748.648332,0.359264,0.360401,0.361970,0.351103,0.599386,0.600334,0.601639,0.592539,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,21.459527,23.200347,27.332451,27.909334,0.830504,0.816781,0.784110,0.779593,0.915846,0.909020,...,7116.880096,0.426427,0.420723,0.407819,0.405956,0.653014,0.648632,0.638607,0.637147,1
156,8.840554,11.883463,12.704092,11.241469,0.929968,0.905865,0.899329,0.910951,0.965332,0.953399,...,2866.574569,0.471493,0.460515,0.457784,0.462804,0.686654,0.678613,0.676597,0.680297,1
157,29.494681,33.564682,35.723147,34.468888,0.768559,0.736649,0.719719,0.729555,0.884336,0.868376,...,8789.566347,0.397950,0.385886,0.379713,0.383286,0.630833,0.621197,0.616208,0.619101,1
158,24.763480,27.562425,30.296392,30.427420,0.763669,0.737159,0.711539,0.709845,0.902890,0.891914,...,7758.992010,0.501404,0.492366,0.483433,0.483675,0.708099,0.701688,0.695294,0.695468,1


In [15]:
glcm_df.columns

Index(['dissimilarity_0', 'dissimilarity_45', 'dissimilarity_90',
       'dissimilarity_135', 'correlation_0', 'correlation_45',
       'correlation_90', 'correlation_135', 'homogeneity_0', 'homogeneity_45',
       'homogeneity_90', 'homogeneity_135', 'contrast_0', 'contrast_45',
       'contrast_90', 'contrast_135', 'ASM_0', 'ASM_45', 'ASM_90', 'ASM_135',
       'energy_0', 'energy_45', 'energy_90', 'energy_135', 'label'],
      dtype='object')

In [16]:
X = glcm_df[['dissimilarity_0', 'dissimilarity_45', 'dissimilarity_90',
       'dissimilarity_135', 'correlation_0', 'correlation_45',
       'correlation_90', 'correlation_135', 'homogeneity_0', 'homogeneity_45',
       'homogeneity_90', 'homogeneity_135', 'contrast_0', 'contrast_45',
       'contrast_90', 'contrast_135', 'ASM_0', 'ASM_45', 'ASM_90', 'ASM_135',
       'energy_0', 'energy_45', 'energy_90', 'energy_135']]
Y = glcm_df['label']

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3)

In [18]:
kernels = ['RBF', 'Sigmoid','Linear']
def chooseKernel(ker):
  if (ker == 0):
      return SVC(kernel = 'rbf', gamma = "auto")
  elif (ker == 1):
      return SVC(kernel = 'sigmoid', gamma = "auto")
  elif (ker == 2):
      return SVC(kernel = 'linear', gamma = "auto")

In [19]:
from sklearn.metrics import classification_report, confusion_matrix   
for i in range(3):
  X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.20)
  svc_classifier = chooseKernel(i)
  svc_classifier.fit(X_train, Y_train)
  predictions = svc_classifier.predict(X_test)
  print("Evaluation:", kernels[i], "kernel")
  print(classification_report(Y_test,predictions))

Evaluation: RBF kernel
              precision    recall  f1-score   support

           0       0.47      1.00      0.64        15
           1       0.00      0.00      0.00        17

    accuracy                           0.47        32
   macro avg       0.23      0.50      0.32        32
weighted avg       0.22      0.47      0.30        32

Evaluation: Sigmoid kernel
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        19
           1       0.41      1.00      0.58        13

    accuracy                           0.41        32
   macro avg       0.20      0.50      0.29        32
weighted avg       0.17      0.41      0.23        32



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Evaluation: Linear kernel
              precision    recall  f1-score   support

           0       0.50      0.53      0.51        17
           1       0.43      0.40      0.41        15

    accuracy                           0.47        32
   macro avg       0.46      0.46      0.46        32
weighted avg       0.47      0.47      0.47        32



In [20]:
# ANN

from sklearn.neural_network import MLPClassifier
mlp_classifier = MLPClassifier(hidden_layer_sizes=(8), random_state=5, verbose=True, learning_rate_init=0.01)
mlp_classifier.fit(X_train,Y_train)

Iteration 1, loss = 18.30342336
Iteration 2, loss = 18.30342329
Iteration 3, loss = 18.30342323
Iteration 4, loss = 18.30342318
Iteration 5, loss = 18.30342314
Iteration 6, loss = 18.30342311
Iteration 7, loss = 18.30342309
Iteration 8, loss = 18.13150323
Iteration 9, loss = 17.18990815
Iteration 10, loss = 14.52049618
Iteration 11, loss = 17.47312530
Iteration 12, loss = 17.50215660
Iteration 13, loss = 17.65582244
Iteration 14, loss = 17.68508774
Iteration 15, loss = 17.70283870
Iteration 16, loss = 17.70351845
Iteration 17, loss = 17.69073447
Iteration 18, loss = 17.67283308
Iteration 19, loss = 17.63123840
Iteration 20, loss = 17.29942299
Iteration 21, loss = 16.45005969
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.


MLPClassifier(hidden_layer_sizes=8, learning_rate_init=0.01, random_state=5,
              verbose=True)

In [27]:
predictions_mlp=mlp_classifier.predict(X_test)
from sklearn.metrics import accuracy_score
accuracy_score(Y_test,predictions_mlp)
print(confusion_matrix(Y_test, Y_pred))
print(classification_report(Y_test, Y_pred))

[[9 9]
 [9 5]]
              precision    recall  f1-score   support

           0       0.50      0.50      0.50        18
           1       0.36      0.36      0.36        14

    accuracy                           0.44        32
   macro avg       0.43      0.43      0.43        32
weighted avg       0.44      0.44      0.44        32



In [26]:
# Naive bayes 

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix 
X_train,X_test,Y_train,Y_test= train_test_split(X,Y,test_size=0.2)
model  = GaussianNB()
model.fit(X_train,Y_train)
Y_pred = model.predict(X_test)
Y_pred
accuracy  = accuracy_score(Y_test,Y_pred)*100
accuracy
print(confusion_matrix(Y_test, Y_pred))
print(classification_report(Y_test, Y_pred))

[[9 9]
 [9 5]]
              precision    recall  f1-score   support

           0       0.50      0.50      0.50        18
           1       0.36      0.36      0.36        14

    accuracy                           0.44        32
   macro avg       0.43      0.43      0.43        32
weighted avg       0.44      0.44      0.44        32



In [25]:
#KNN

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, Y_train)
Y_pred = classifier.predict(X_test)
print(confusion_matrix(Y_test, Y_pred))
print(classification_report(Y_test, Y_pred))

[[13  4]
 [ 6  9]]
              precision    recall  f1-score   support

           0       0.68      0.76      0.72        17
           1       0.69      0.60      0.64        15

    accuracy                           0.69        32
   macro avg       0.69      0.68      0.68        32
weighted avg       0.69      0.69      0.69        32

