In [1]:
import glob
import os
import cv2
import numpy as np
import math
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [2]:
labels = ["kecubung", "kurung", "likingpaku", "mekawis"]
dataset = ["datatraining", "datatesting"]

In [3]:
datatraining = {x : [] for x in labels}
datatesting = {x : [] for x in labels}
for c in dataset:
    for d in labels:
        path = glob.glob("dataset/"+c+"/"+d+"/*.png")
        if c == "datatraining":
            datatraining[d] = path
        elif c == "datatesting":
            datatesting[d] = path

In [4]:
# datatesting

In [5]:
def get_miu(glcm):
    miu_i = 0
    miu_j = 0
    for i in range(glcm.shape[0]):
        for j in range(glcm.shape[1]):
            miu_i += glcm[i][j]*i
            miu_j += glcm[i][j]*j
    return miu_i, miu_j

def get_sigma(glcm, miu_i, miu_j):
    sigma_i = 0
    sigma_j = 0
    for i in range(glcm.shape[0]):
        for j in range(glcm.shape[0]):
            temp_sigma_i = 0
            if glcm[i][j]>0:
                temp_sigma_i = ((i-miu_i)**2)*glcm[i][j]
                sigma_i += temp_sigma_i
    sigma_i = math.sqrt(sigma_i)
    
    glcm_transpose = glcm.T
    for i in range(glcm_transpose.shape[0]):
        for j in range(glcm_transpose.shape[0]):
            temp_sigma_j = 0
            if glcm_transpose[i][j]>0:
                temp_sigma_j = ((i-miu_j)**2)*glcm_transpose[i][j]
                sigma_j += temp_sigma_j
    sigma_j = math.sqrt(sigma_j)
            
    return sigma_i, sigma_j

def get_corellation(glcm):
    miu_i, miu_j = get_miu(glcm)
    sigma_i, sigma_j = get_sigma(glcm, miu_i, miu_j)
    correlation = 0
    for i in range(glcm.shape[0]):
        for j in range(glcm.shape[0]):
            if glcm[i][j]>0:
                correlation += (i-miu_i)*(j-miu_j)*(glcm[i][j])/sigma_i*sigma_j
    return correlation

In [6]:
#maximum number of gray levels
gray_level = 256

def maxGrayLevel(img):
    max_gray_level=0
    (height,width)=img.shape
    for y in range(height):
        for x in range(width):
            if img[y][x] > max_gray_level:
                max_gray_level = img[y][x]
    return max_gray_level+1
                                                

def getGlcm(input, degree, distance):
    srcdata=input.copy()
    ret=[[0.0 for i in range(gray_level)] for j in range(gray_level)]
    (height,width) = input.shape

    max_gray_level=maxGrayLevel(input)
    #If the number of gray levels is greater than gray_level, reduce the gray level of the image to gray_level and reduce the size of the gray level co-occurrence matrix
    if max_gray_level > gray_level:
        for j in range(height):
            for i in range(width):
                srcdata[j][i] = srcdata[j][i]*gray_level / max_gray_level

    if degree == 0:
        for j in range(height - 0):
            for i in range(width - distance):
                rows = srcdata[j][i]
                cols = srcdata[j + 0][i+ distance]
                ret[rows][cols]+=1.0
    elif degree == 45:
        for j in range(height - distance):
            for i in range(width - distance):
                rows = srcdata[j][i]
                cols = srcdata[j + distance][i+ distance]
                ret[rows][cols]+=1.0
    elif degree == 90:
        for j in range(height - distance):
            for i in range(width):
                rows = srcdata[j][i]
                cols = srcdata[j - distance][i]
                ret[rows][cols]+=1.0
    elif degree == 135:
        for j in range(distance, height):
            for i in range(distance, width):
                rows = srcdata[j][i]
                cols = srcdata[j - distance][i - distance]
                ret[rows][cols]+=1.0

    for i in range(gray_level):
        for j in range(gray_level):
            ret[i][j]/=float(height*width)

    return ret

def feature_computer(p):
    # Con=0.0 #contras
    Ent=0.0 #entropy
    Hom=0.0   #homogenity
    Diss=0.0 #dissimilarty
    Idm=0.0 #IDM
    Asm=0.0 #Angle second moment
    
    Eng=0.0 #energy
    for i in range(gray_level):
        for j in range(gray_level):
            # Con+=p[i][j] * np.power(i - j, 2)
            Asm+=p[i][j]*p[i][j]
            Diss+=p[i][j]*np.abs(i-j)
            Eng+= np.power(p[i][j], 2)
            Hom+= p[i][j] / (1 + np.power(i - j, 2))
            Idm+=p[i][j]/(1+(i-j)*(i-j))
            if p[i][j]>0.0:
                Ent+=p[i][j]*math.log(p[i][j])
    return Eng,Diss,Hom,Idm,-Ent,Asm

In [26]:
#get feature data training
def get_feature():
    label = []
    fitur = []
    for key, values in datatraining.items():
        for path in values:
            degree = [0,45,90,135]
            for d in degree:
                img =cv2.imread(path, 0)
                glcm = getGlcm(img, d, 1)
                glcm = np.array(glcm)
                correlation = get_corellation(glcm)
                Eng,diss,hom,idm,ent,Asm=feature_computer(glcm)
                fitur.append([Eng,diss,hom,idm,ent,Asm])
                label.append(key)
    return fitur, label

fitur, label = get_feature()

In [27]:
df_fitur = pd.DataFrame(fitur, columns = ["energy","dissimilarity","homogeneity","IDM","entropy","ASM"])
df_fitur["labels"] = pd.DataFrame(label)
df_fitur.to_csv("train_glcm_feature_training_rev.csv", index = False)

In [28]:
def get_feature():
    label = []
    fitur = []
    for key, values in datatesting.items():
        for path in values:
            degree = [0,45,90,135]
            for d in degree:
                img =cv2.imread(path, 0)
                glcm = getGlcm(img, d, 1)
                glcm = np.array(glcm)
                correlation = get_corellation(glcm)
                Eng,diss,hom,idm,ent,Asm=feature_computer(glcm)
                fitur.append([Eng,diss,hom,idm,ent,Asm])
                label.append(key)
    return fitur, label

fitur, label = get_feature()

In [29]:
df_fitur = pd.DataFrame(fitur, columns = ["energy","dissimilarity","homogeneity","IDM","entropy","ASM"])
df_fitur["labels"] = pd.DataFrame(label)
df_fitur.to_csv("test_glcm_feature_testing_rev.csv", index = False)

In [11]:
df_fitur

Unnamed: 0,energy,dissimilarity,homogeneity,IDM,entropy,ASM,labels
0,0.005192,3.387660,0.384453,0.384453,6.079523,0.005192,kecubung
1,0.004419,3.801076,0.339398,0.339398,6.249987,0.004419,kecubung
2,0.005782,1.903476,0.424551,0.424551,5.850643,0.005782,kecubung
3,0.004419,3.801076,0.339398,0.339398,6.249987,0.004419,kecubung
4,0.003436,2.559524,0.396064,0.396064,6.575935,0.003436,kecubung
...,...,...,...,...,...,...,...
523,0.000597,14.630796,0.108271,0.108271,8.279319,0.000597,mekawis
524,0.000704,9.342492,0.142189,0.142189,8.146068,0.000704,mekawis
525,0.000577,16.078776,0.115001,0.115001,8.485442,0.000577,mekawis
526,0.000720,14.139112,0.141761,0.141761,8.351088,0.000720,mekawis


In [12]:
X_train, X_test, y_train, y_test = train_test_split( fitur, label, test_size=0.25, random_state=42)

In [13]:
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train)
y_pred = neigh.predict(X_test)
accuracy_score(y_test, y_pred)

0.7348484848484849

In [None]:
# import cv2
# import math
# np.set_printoptions(suppress=True, threshold=np.inf)
# im = cv2.imread('dataset/datatesting/kecubung\\kecubung10.png',0)
# glcm = np.array(getGlcm(im,45,1))