In [7]:
# Import library yang kalian butuhkan
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import seaborn as sns
import math
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from skimage.feature import graycomatrix, graycoprops
from sklearn.feature_selection import SelectKBest, chi2
from scipy.stats import entropy
from numpy import ravel

## Data Loading

In [8]:
# data = []
# labels = []
# paths = []
# file_name = []

# for sub_folder in os.listdir("dataset\\"):
#     sub_folder_files = os.listdir(os.path.join("dataset\\", sub_folder))
#     for i, filename in enumerate(sub_folder_files):
#         img_path = os.path.join("dataset\\", sub_folder, filename)
#         img = cv.imread(img_path)
        
#         if img is None:  # Cek apakah gambar terbaca dengan benar
#             print(f"Failed to read image {img_path}")
#             continue  # Lanjutkan ke gambar berikutnya
        
#         resized_img = cv.resize(img, (256, 256))  # Resize gambar menjadi 256x256 piksel
#         data.append(resized_img)  # Tambahkan gambar yang di-resize, bukan gambar asli
#         labels.append(sub_folder)
#         paths.append(img_path)
#         name = os.path.splitext(filename)[0]
#         file_name.append(filename)
        
#         # Membuat direktori output jika belum ada
#         output_dir = os.path.join("resizeImage\\", sub_folder) 
#         if not os.path.exists(output_dir):
#             os.makedirs(output_dir)
        
#         # Menyimpan gambar yang di-resize ke direktori output
#         cv.imwrite(os.path.join(output_dir, filename), resized_img)

# print("Image processing complete.")


In [9]:
data = []
labels = []
paths = []
file_name = []
for sub_folder in os.listdir("resizeImage\\"):
    sub_folder_files = os.listdir(os.path.join("resizeImage\\", sub_folder))
    for i, filename in enumerate(sub_folder_files):
        img_path = os.path.join("resizeImage\\", sub_folder, filename)
        img = cv.imread(img_path)
        img = img.astype(np.uint8)
        # resized_img = cv.resize(img, (256, 256))  # Resize the image to 256x256 pixels
        data.append(img)
        labels.append(sub_folder)
        paths.append(img_path)
        name = os.path.splitext(filename)[0]
        file_name.append(filename)

In [10]:
print(len(data), len(labels), len(paths))

2099 2099 2099


## Data Preparation

### Data Augmentation

In [11]:
def rotate(images, angle):
	rotatedImages = []
	for i in range(len(images)):
		rows, cols = images[i].shape[:2]
		M = cv.getRotationMatrix2D((cols/2, rows/2), angle, 1)
		rotated = cv.warpAffine(images[i], M, (cols, rows))
		rotatedImages.append(rotated)
	return rotatedImages

data = np.array(data)
labels = np.array(labels)
paths = np.array(paths)

### Preprocessing

#### Preprocessing 1

In [12]:
def rgb2gray(images):
    grayImages = []
    for i in range(len(images)):
        gray = cv.cvtColor(images[i], cv.COLOR_RGB2GRAY)
        grayImages.append(gray)
    return grayImages

#### Preprocessing 2

In [13]:
def equalization(images):
	equalizedData = []
	for i in range(len(images)):
		equalized = cv.equalizeHist(images[i])
		equalizedData.append(equalized)
	return equalizedData

#### Preprocessing n

In [14]:
def remove_background(image):
    hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)

    lower_yellow = np.array([20, 100, 100])
    upper_yellow = np.array([40, 255, 255])

    lower_orange = np.array([5, 100, 100])
    upper_orange = np.array([15, 255, 255])

    lower_white = np.array([0, 0, 200])
    upper_white = np.array([40, 50, 255])

    lower_green = np.array([30, 50, 50])
    upper_green = np.array([90, 255, 255])

    lower_cream = np.array([0, 50, 150])
    upper_cream = np.array([30, 150, 220])

    lower_brown = np.array([10, 50, 50])
    upper_brown = np.array([20, 255, 255])

    mask_yellow = cv.inRange(hsv, lower_yellow, upper_yellow)
    mask_orange = cv.inRange(hsv, lower_orange, upper_orange)
    mask_white = cv.inRange(hsv, lower_white, upper_white)
    mask_green = cv.inRange(hsv, lower_green, upper_green)
    mask_cream = cv.inRange(hsv, lower_cream, upper_cream)
    mask_brown = cv.inRange(hsv, lower_brown, upper_brown)
    mask = cv.bitwise_or(mask_yellow, mask_orange)
    mask = cv.bitwise_or(mask, mask_white)
    mask = cv.bitwise_or(mask, mask_green)
    mask = cv.bitwise_or(mask, mask_cream)
    mask = cv.bitwise_or(mask, mask_brown)

    result = cv.bitwise_and(image, image, mask=mask)
    return result

In [15]:
def prewitt(image):
    kernelx = np.array([[1, 0, -1], [1, 0, -1], [1, 0, -1]], dtype=int)
    kernely = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]], dtype=int)
    prewittx = cv.filter2D(image, cv.CV_64F, kernelx)
    prewitty = cv.filter2D(image, cv.CV_64F, kernely)
    prewitt = np.sqrt(prewittx**2 + prewitty**2)
    prewitt = cv.convertScaleAbs(prewitt)
    return prewitt

In [16]:
def remove_background_grayImage(image):
	hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)

	lower_yellow = np.array([20, 100, 100])
	upper_yellow = np.array([40, 255, 255])

	lower_orange = np.array([5, 100, 100])
	upper_orange = np.array([15, 255, 255])

	lower_white = np.array([0, 0, 200])
	upper_white = np.array([40, 50, 255])

	lower_green = np.array([30, 50, 50])
	upper_green = np.array([90, 255, 255])

	lower_cream = np.array([0, 50, 150])
	upper_cream = np.array([30, 150, 220])

	lower_brown = np.array([10, 50, 50])
	upper_brown = np.array([20, 255, 255])

	mask_yellow = cv.inRange(hsv, lower_yellow, upper_yellow)
	mask_orange = cv.inRange(hsv, lower_orange, upper_orange)
	mask_white = cv.inRange(hsv, lower_white, upper_white)
	mask_green = cv.inRange(hsv, lower_green, upper_green)
	mask_cream = cv.inRange(hsv, lower_cream, upper_cream)
	mask_brown = cv.inRange(hsv, lower_brown, upper_brown)
	mask = cv.bitwise_or(mask_yellow, mask_orange)
	mask = cv.bitwise_or(mask, mask_white)
	mask = cv.bitwise_or(mask, mask_green)
	mask = cv.bitwise_or(mask, mask_cream)
	mask = cv.bitwise_or(mask, mask_brown)

	result = cv.bitwise_and(image, image, mask=mask)
	return result

In [17]:
def gradient(image):
	sobelx = cv.Sobel(image, cv.CV_64F, 1, 0, ksize=3)
	sobely = cv.Sobel(image, cv.CV_64F, 0, 1, ksize=3)
	gradient = np.sqrt(sobelx**2 + sobely**2)
	gradient = cv.convertScaleAbs(gradient)
	return gradient

In [18]:
def treshold(image, treshold):
	ret, tresholded = cv.threshold(image, treshold, 255, cv.THRESH_BINARY)
	return tresholded

In [19]:
def GaussBlurring(image):
	blurred = cv.GaussianBlur(image, (3, 3), 0)
	return blurred

In [20]:
dataPreprocessed = []
for i in data:
    i = remove_background(i)
    i = cv.cvtColor(i, cv.COLOR_BGR2GRAY)
    i = prewitt(i)
    i = treshold(i, 100)
    dataPreprocessed.append(i)

### Feature Extraction

In [21]:
def glcm(image, bitrange, derajat):
    levels = int(np.power(2, bitrange))
    if derajat == 0:
        angles = [0]
    elif derajat == 45:
        angles = [np.pi / 4]
    elif derajat == 90:
        angles = [np.pi / 2]
    elif derajat == 135:
        angles = [3 * np.pi / 4]
    else:
        raise ValueError("Invalid angle. It should be one of the following: 0, 45, 90, 135.")

    glcm = graycomatrix(image, [1], angles, levels=levels, symmetric=True, normed=True)
    return glcm

In [22]:
def correlation(matriks):
    return graycoprops(matriks, prop='correlation')[0][0].ravel()

In [23]:
def dissimilarity(matriks):
    dissimilarity = graycoprops(matriks, 'dissimilarity')
    return dissimilarity[0, 0]

In [24]:
def homogenity(matriks):
    homogenity = graycoprops(matriks, 'homogeneity')
    return homogenity[0, 0]

In [25]:
def contrast(matriks):
    contrast = graycoprops(matriks, 'contrast')
    return contrast[0, 0]

In [26]:
def entropy(matriks):
    matriks = matriks.flatten()
    matriks = matriks / np.sum(matriks)
    return -np.sum(matriks * np.log2(matriks))

In [27]:
def ASM(matriks):
    matriks = matriks.flatten()
    matriks = matriks / np.sum(matriks)
    return np.sum(matriks**2)

In [28]:
def energy(matriks):
    return np.sqrt(ASM(matriks))

In [29]:
Derajat0 = []
Derajat45 = []
Derajat90 = []
Derajat135 = []
for i in range(len(dataPreprocessed)):
    D0 = glcm(dataPreprocessed[i], 8, 0)
    D45 = glcm(dataPreprocessed[i], 8, 45)
    D90 = glcm(dataPreprocessed[i], 8, 90)
    D135 = glcm(dataPreprocessed[i], 8, 135)
    Derajat0.append(D0)
    Derajat45.append(D45)
    Derajat90.append(D90)
    Derajat135.append(D135)

In [30]:
Kontras0 = []
Kontras45 = []
Kontras90 = []
Kontras135 = []
dissimilarity0 = []
dissimilarity45 = []
dissimilarity90 = []
dissimilarity135 = []
homogenity0 = []
homogenity45 = []
homogenity90 = []
homogenity135 = []
entropy0 = []
entropy45 = []
entropy90 = []
entropy135 = []
ASM0 = []
ASM45 = []
ASM90 = []
ASM135 = []
energy0 = []
energy45 = []
energy90 = []
energy135 = []
correlation0 = []
correlation45 = []
correlation90 = []
correlation135 = []


In [31]:
for i in range(len(data)):
    C0 = correlation(Derajat0[i])
    correlation0.append(C0)
    C45 = correlation(Derajat45[i])
    correlation45.append(C45)
    C90 = correlation(Derajat90[i])
    correlation90.append(C90)
    C135 = correlation(Derajat135[i])
    correlation135.append(C135)

In [32]:
for i in range(len(data)):
    K0 = contrast(Derajat0[i])
    K45 = contrast(Derajat45[i])
    K90 = contrast(Derajat90[i])
    K135 = contrast(Derajat135[i])
    Kontras0.append(K0)
    Kontras45.append(K45)
    Kontras90.append(K90)
    Kontras135.append(K135)

In [33]:
for i in range(len(data)):
    Dis0 = dissimilarity(Derajat0[i])
    Dis45 = dissimilarity(Derajat45[i])
    Dis90 = dissimilarity(Derajat90[i])
    Dis135 = dissimilarity(Derajat135[i])
    dissimilarity0.append(Dis0)
    dissimilarity45.append(Dis45)
    dissimilarity90.append(Dis90)
    dissimilarity135.append(Dis135)

In [34]:
for i in range(len(data)):
    H0 = homogenity(Derajat0[i])
    H45 = homogenity(Derajat45[i])
    H90 = homogenity(Derajat90[i])
    H135 = homogenity(Derajat135[i])
    homogenity0.append(H0)
    homogenity45.append(H45)
    homogenity90.append(H90)
    homogenity135.append(H135)

In [35]:
for i in range(len(data)):  
    E0 = entropy(Derajat0[i])
    E45 = entropy(Derajat45[i])
    E90 = entropy(Derajat90[i])
    E135 = entropy(Derajat135[i])
    entropy0.append(E0)
    entropy45.append(E45)
    entropy90.append(E90)
    entropy135.append(E135)

  return -np.sum(matriks * np.log2(matriks))
  return -np.sum(matriks * np.log2(matriks))


In [36]:
for i in range(len(data)):
    A0 = ASM(Derajat0[i])
    A45 = ASM(Derajat45[i])
    A90 = ASM(Derajat90[i])
    A135 = ASM(Derajat135[i])
    ASM0.append(A0)
    ASM45.append(A45)
    ASM90.append(A90)
    ASM135.append(A135)

In [37]:
for i in range(len(data)):
    ER0 = energy(Derajat0[i])
    ER45 = energy(Derajat45[i])
    ER90 = energy(Derajat90[i])
    ER135 = energy(Derajat135[i])
    energy0.append(ER0)
    energy45.append(ER45)
    energy90.append(ER90)
    energy135.append(ER135)

In [38]:
data = {'Filename': file_name, 'Label': labels,
        'Contrast0': Kontras0, 'Contrast45': Kontras45, 'Contrast90': Kontras90, 'Contrast135': Kontras135,
        'Homogeneity0': homogenity0, 'Homogeneity45': homogenity45, 'Homogeneity90': homogenity90, 'Homogeneity135': homogenity135,
        'Dissimilarity0': dissimilarity0, 'Dissimilarity45': dissimilarity45, 'Dissimilarity90': dissimilarity90, 'Dissimilarity135': dissimilarity135,
        'Entropy0': entropy0, 'Entropy45': entropy45, 'Entropy90': entropy90, 'Entropy135': entropy135,
        'ASM0': ASM0, 'ASM45': ASM45, 'ASM90': ASM90, 'ASM135': ASM135,
        'Energy0': energy0, 'Energy45': energy45, 'Energy90': energy90, 'Energy135': energy135,
        'Correlation0': correlation0, 'Correlation45': correlation45, 'Correlation90': correlation90, 'Correlation135': correlation135,
        }
df = pd.DataFrame(data)
df.to_csv('hasil_ekstraksi.csv', index=False)

In [45]:
hasilEkstrak = pd.read_csv('hasil_ekstraksi.csv')
hasilEkstrak = hasilEkstrak.drop(columns=['Filename'])
hasilEkstrak

Unnamed: 0,Label,Contrast0,Contrast45,Contrast90,Contrast135,Homogeneity0,Homogeneity45,Homogeneity90,Homogeneity135,Dissimilarity0,...,ASM90,ASM135,Energy0,Energy45,Energy90,Energy135,Correlation0,Correlation45,Correlation90,Correlation135
0,finger_1,233.085938,306.0,233.085938,304.0,0.996415,0.995294,0.996415,0.995325,0.914062,...,0.984975,0.983849,0.992459,0.991876,0.992459,0.991892,[0.68702712],[0.59071848],[0.68702712],[0.59339352]
1,finger_1,2659.570312,3245.0,2066.894531,3491.0,0.959100,0.950097,0.968214,0.946314,10.429688,...,0.854171,0.833774,0.919564,0.914967,0.924214,0.913112,[0.64487964],[0.56763602],[0.7237268],[0.53485897]
2,finger_1,2836.875000,3858.0,2685.468750,3568.0,0.956373,0.940670,0.958702,0.945130,11.125000,...,0.797265,0.784422,0.891520,0.883443,0.892897,0.885676,[0.73311369],[0.63760236],[0.74685149],[0.66484324]
3,finger_1,13460.214844,19565.0,17371.875000,19539.0,0.793003,0.699120,0.732847,0.699520,52.785156,...,0.308166,0.293681,0.582982,0.541776,0.555127,0.541923,[0.58264369],[0.39353201],[0.4614319],[0.39433794]
4,finger_1,1542.949219,2005.0,1452.304688,2015.0,0.976272,0.969166,0.977666,0.969012,6.050781,...,0.906714,0.898694,0.951748,0.948071,0.952215,0.947995,[0.66584492],[0.56741026],[0.68741151],[0.5652527]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,finger_5,15098.789062,18752.0,15525.117188,18960.0,0.767804,0.711623,0.761248,0.708424,59.210938,...,0.334953,0.309874,0.581697,0.557868,0.578751,0.556663,[0.51959906],[0.40363499],[0.50598303],[0.39702002]
2095,finger_5,3268.183594,5192.0,3214.394531,4686.0,0.949740,0.920155,0.950568,0.927937,12.816406,...,0.759852,0.739799,0.871364,0.856271,0.871695,0.860116,[0.7395704],[0.58699513],[0.74407947],[0.6272456]
2096,finger_5,4723.476562,6704.0,4394.765625,6044.0,0.927360,0.896903,0.932415,0.907053,18.523438,...,0.677712,0.655609,0.820524,0.804642,0.823232,0.809697,[0.71994044],[0.60359025],[0.73932322],[0.64261627]
2097,finger_5,2745.234375,3457.0,2188.417969,3535.0,0.957783,0.946837,0.966345,0.945637,10.765625,...,0.815479,0.796077,0.898540,0.892831,0.903039,0.892231,[0.72259643],[0.65141681],[0.77858339],[0.64355176]


### Features Selection

In [47]:

from sklearn.feature_selection import SelectKBest, f_classif

# Memeriksa tipe data dari setiap kolom
print(hasilEkstrak.dtypes)

# Memeriksa apakah ada nilai non-numerik dalam DataFrame
non_numeric_columns = x.select_dtypes(exclude=['number']).columns
if len(non_numeric_columns) > 0:
    print(f"Kolom non-numerik ditemukan: {non_numeric_columns}")

# Menghapus kolom non-numerik dari DataFrame
x = x.select_dtypes(include=['number'])

# Inisialisasi SelectKBest dengan fungsi f_classif dan pilih 10 fitur terbaik
selector = SelectKBest(score_func=f_classif, k=10)

# Fit selector ke data
selector.fit(x, y)

# Dapatkan skor untuk setiap fitur
scores = selector.scores_

# Buat DataFrame dengan fitur dan skor yang sesuai
scoresFitur = pd.DataFrame({'Fitur': x.columns, 'Skor': scores})

# Cetak fitur terbaik berdasarkan nilai F
print("Fitur Terbaik berdasarkan nilai F")
print(scoresFitur.sort_values(by='Skor', ascending=False).head(10))


Label                object
Contrast0           float64
Contrast45          float64
Contrast90          float64
Contrast135         float64
Homogeneity0        float64
Homogeneity45       float64
Homogeneity90       float64
Homogeneity135      float64
Dissimilarity0      float64
Dissimilarity45     float64
Dissimilarity90     float64
Dissimilarity135    float64
Entropy0            float64
Entropy45           float64
Entropy90           float64
Entropy135          float64
ASM0                float64
ASM45               float64
ASM90               float64
ASM135              float64
Energy0             float64
Energy45            float64
Energy90            float64
Energy135           float64
Correlation0         object
Correlation45        object
Correlation90        object
Correlation135       object
dtype: object
Kolom non-numerik ditemukan: Index(['Correlation0', 'Correlation45', 'Correlation90', 'Correlation135'], dtype='object')


ValueError: Input X contains NaN.
SelectKBest does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [46]:
from sklearn.feature_selection import SelectKBest, f_classif

# Pastikan 'hasilEkstrak' adalah DataFrame pandas yang sudah didefinisikan
# 'x' berisi semua fitur kecuali kolom 'Label'
x = hasilEkstrak.drop(columns=['Label'])

# 'y' berisi variabel target
y = hasilEkstrak['Label']

# Inisialisasi SelectKBest dengan fungsi f_classif dan pilih 10 fitur terbaik
selector = SelectKBest(score_func=f_classif, k=10)

# Fit selector ke data
selector.fit(x, y)

# Dapatkan skor untuk setiap fitur
scores = selector.scores_

# Buat DataFrame dengan fitur dan skor yang sesuai
scoresFitur = pd.DataFrame({'Fitur': x.columns, 'Skor': scores})

# Cetak fitur terbaik berdasarkan nilai F
print("Fitur Terbaik berdasarkan nilai F")
print(scoresFitur.sort_values(by='Skor', ascending=False).head(10))

ValueError: could not convert string to float: '[0.68702712]'

In [41]:
from sklearn.decomposition import PCA

# Assuming that df is your DataFrame and it's already loaded
x = df.drop(columns=['Filename', 'Label'])  # drop non-numeric columns
y = df['Label']

pca = PCA(n_components=10)  # adjust number of components
x_pca = pca.fit_transform(x)

# Convert the PCA result into a DataFrame
df_pca = pd.DataFrame(data=x_pca, columns=["PC" + str(i) for i in range(1, pca.n_components_+1)])

# Add the target column back into the DataFrame
df_pca['Label'] = y

print(df_pca.head())

ValueError: Input X contains NaN.
PCA does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

### Splitting Data

In [None]:
datatrain = pd.read_csv('hasil_ekstraksi.csv')
x = datatrain[['Entropy0','Entropy45','Entropy135','ASM0','ASM45','ASM135','Entropy90','ASM90']]
y = datatrain['Label']

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

### Normalization

## Modeling

## Evaluation