In [1]:
# Import library yang kalian butuhkan
import os
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.metrics import accuracy_score, classification_report
from skimage.feature import graycomatrix, graycoprops
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from scipy.stats import entropy
import seaborn as sns


## Data Loading

In [2]:
def load_images_from_folder(folder_path):
    data = []
    file_names = []
    
    try:
        dataset = os.listdir(folder_path)
        
        for file in dataset:
            file_path = os.path.join(folder_path, file)
            name, ext = os.path.splitext(file)
            
            try:
                img = cv.imread(file_path)
                if img is None:
                    raise Exception("Failed to read image")
                
                data.append(img.astype(np.uint8))
                file_names.append(name)
            except cv.error as e:
                print(f"Error reading image {file}: {e}")
            except Exception as e:
                print(f"Error processing image {file}: {e}")
    
    except FileNotFoundError:
        print(f"Folder not found: {folder_path}")
    except Exception as e:
        print(f"Error processing folder {folder_path}: {e}")
    
    return data, file_names



In [3]:
def write_image(data, file_name, path):
    for i in range(len(data)):
        image = data[i].astype(np.uint8)
        min_val = np.min(image)
        max_val = np.max(image)
        
        new_image = (image - min_val) / (max_val - min_val)
        int_image = (new_image * 255).astype(np.uint8)
        
        resized_image = cv.resize(int_image, (300, 300), interpolation=cv.INTER_NEAREST)
        data[i] = resized_image
        
        save_image_path = path + file_name[i] + ".jpg"
        cv.imwrite (save_image_path, data[i])

In [4]:
images, file_names = load_images_from_folder('dataset/finger_1/')
write_image(images, file_names, 'output/finger_1/')
images, file_names = load_images_from_folder('dataset/finger_2/')
write_image(images,file_names, 'resizeImage/finger_2/')
images, file_names = load_images_from_folder('dataset/finger_3/')
write_image(images,file_names, 'output/finger_3')
images, file_names = load_images_from_folder('dataset/finger_4/')
write_image(images,file_names, 'output/finger_4/')
images, file_names = load_images_from_folder('dataset/finger_5/')
write_image(images,file_names, 'output/finger_5/')

Error processing image 05_F1D022161_1_001.Jpg: Failed to read image
Error processing image 05_F1D022161_1_002.Jpg: Failed to read image
Error processing image 05_F1D022161_1_003.Jpg: Failed to read image
Error processing image 08_F1D022052_1_1.jpg: Failed to read image
Error processing image 08_F1D022052_1_2.jpg: Failed to read image
Error processing image 08_F1D022052_1_3.jpg: Failed to read image
Error processing image 08_F1D022157_1_1.JPG: Failed to read image
Error processing image 08_F1D022157_1_2.JPG: Failed to read image
Error processing image 08_F1D022157_1_3.JPG: Failed to read image
Error processing image 10_F1D022151_1_1.jpg: Failed to read image
Error processing image 10_F1D022151_1_2.jpg: Failed to read image
Error processing image 10_F1D022151_1_3.jpg: Failed to read image
Error processing image 17_F1D022106_1_1.Jpg: Failed to read image
Error processing image 17_F1D022106_1_2.Jpg: Failed to read image
Error processing image 17_F1D022106_1_3.Jpg: Failed to read image
Erro

## Data Preparation

### Data Augmentation

In [None]:
def rotate_image(image, angle):
    height, width = image.shape[:2]
    center_x, center_y = width // 2, height // 2

    radians = np.deg2rad(angle)
    cos_val = np.cos(radians)
    sin_val = np.sin(radians)

    rotated_image = np.zeros((height, width, 3), dtype=np.uint8)

    for y in range(height):
        for x in range(width):
            xp = int(round(cos_val * (x - center_x) - sin_val * (y - center_y) + center_x))
            yp = int(round(sin_val * (x - center_x) + cos_val * (y - center_y) + center_y))

            if 0 <= xp < width and 0 <= yp < height:
                rotated_image[y, x] = image[yp, xp]

    return rotated_image

def augment_images(image_dir, output_dir):
    for filename in os.listdir(image_dir):
        image_path = os.path.join(image_dir, filename)
        image = cv.imread(image_path)
        for angle in [30, 90, 120, 360]:
            rotated_image = rotate_image(image, angle)
            output_filename = f'{filename[:-4]}_rotated_{angle}.jpg'
            output_path = os.path.join(output_dir, output_filename)
            cv.imwrite(output_path, rotated_image)
            
augment_images('resizeImage/finger_1','augment/finger_1/')
augment_images('resizeImage/finger_2','augment/finger_2/')
augment_images('resizeImage/finger_3','augment/finger_3/')
augment_images('resizeImage/finger_4','augment/finger_4/')
augment_images('resizeImage/finger_5','augment/finger_5/')


KeyboardInterrupt: 

### Preprocessing

#### Preprocessing 1

In [None]:
def normalize_images_in_folder(folder_path, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg"):
            img_path = os.path.join(folder_path, filename)
            img = cv.imread(img_path)
            gray_img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            normalized_img = normalize(gray_img)
            output_path = os.path.join(output_folder, f"normalized_{filename}")
            cv.imwrite(output_path, normalized_img)

def normalize(citra):
    min_val = np.min(citra)
    max_val = np.max(citra)
    normalized_citra = (citra - min_val) / (max_val - min_val) * 255
    return normalized_citra.astype(np.uint8)

normalize_images_in_folder('augment/finger_1', 'normalized_images/finger_1')
normalize_images_in_folder('augment/finger_2', 'normalized_images/finger_2')
normalize_images_in_folder('augment/finger_3', 'normalized_images/finger_3')
normalize_images_in_folder('augment/finger_4', 'normalized_images/finger_4')
normalize_images_in_folder('augment/finger_5', 'normalized_images/finger_5')

### Feature Extraction

In [None]:
def co_occurence(citra, rentang, derajat, distance=1):
    matrixCO = np.zeros([rentang+1,rentang+1])
    p,l = citra.shape
    
    if(derajat == 0):
        for i in range(p):
            for j in range(l):
                if(j+distance < l):
                    matrixCO[citra[i,j], citra[i,j+distance]] += 1
    
    elif (derajat == 45):
        for i in range(p):
            for j in range(l):
                if(i-distance >= 0 and j+distance < l):
                    matrixCO[citra[i,j], citra[i-distance,j+distance]] += 1
                
    elif (derajat == 90):
        for i in range(p):
            for j in range(l):
                if(i-distance >= 0):
                    matrixCO[citra[i,j], citra[i-distance,j]] += 1
    
    elif (derajat == 135):
        for i in range(p):
            for j in range(l):
                if(i-distance >=0 and j-distance >=0 ):
                    matrixCO[citra[i,j], citra[i-distance,j-distance]] += 1
                
    return matrixCO

def getGLCM(citra, derajat, distance=1):
    co = co_occurence(citra, 255 ,derajat, distance)
    simetris = co + np.transpose(co)
    normal = simetris/np.sum(simetris)
    return normal

def countFeatures(array):
    p,l = array.shape

    kontras = 0
    diss = 0
    hg = 0
    ent = 0
    asm = 0

    miux = 0
    miuy = 0 
    sigx = 0 
    sigy = 0
    corr = 0

    for i in range (p):
        for j in range (l):
            kontras += array[i][j] * (pow((i-j),2))
            diss += array[i][j] * (abs(i-j))
            hg += array[i][j] / (1+((i-j)*(i-j)))
            if(array[i][j]!=0):
                ent += -(array[i][j] * (math.log(array[i][j],2)))

            asm += pow(array[i][j],2)
            
            miux += i * array[i,j]
            miuy += j * array[i,j]

    for i in range (p):
        for j in range (l):
            sigx += pow((1-miux), 2) * array[i,j]
            sigy += pow((1-miuy), 2) * array[i,j]
    
    for i in range (p):
        for j in range(l):
            corr += ((i-miux)*(j-miuy)*array[i,j]) / (math.sqrt(sigx*sigy))

    eng = math.sqrt(asm)

    return [kontras, diss, hg, ent, asm, eng, corr]

def ekstraksiFitur(matriks):
    baris = countFeatures(matriks)
    return baris

In [None]:
def ekstraksiData(names, images, labels, distance=1):
    features = []
    count = len(images)
    for name, image,label in zip(names, images, labels):
        print(f"process : [{label}] {name}")

        glcm0 = getGLCM(image, 0, distance)
        glcm45 = getGLCM(image, 45, distance)
        glcm90 = getGLCM(image, 90, distance)
        glcm135 = getGLCM(image, 135, distance)  

        fitur0 = ekstraksiFitur(glcm0)
        fitur45 = ekstraksiFitur(glcm45)
        fitur90 = ekstraksiFitur(glcm90)
        fitur135 = ekstraksiFitur(glcm135)
        fitur = [
            fitur0[0], fitur0[1], fitur0[2], fitur0[3], fitur0[4], fitur0[5], fitur0[6], 
            fitur45[0], fitur45[1], fitur45[2], fitur45[3], fitur45[4], fitur45[5], fitur45[6], 
            fitur90[0],fitur90[1],fitur90[2],fitur90[3], fitur90[4], fitur90[5], fitur90[6], 
            fitur135[0],fitur135[1],fitur135[2],fitur135[3], fitur135[4], fitur135[5], fitur135[6],
            label
        ]
        # print(fitur)
        features.append(fitur)
    return features

In [None]:
dataset_dir = "normalize_images/" 

imgs = [] 
labels = []
names = []
for sub_folder in os.listdir(dataset_dir):
    sub_folder_files = os.listdir(os.path.join(dataset_dir, sub_folder))
    for i, filename in enumerate(sub_folder_files[:100]):
        img = cv.imread(os.path.join(dataset_dir, sub_folder, filename))
        gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
        imgs.append(gray)
        labels.append(sub_folder)
        names.append(filename)

imgs = np.array(imgs)
labels = np.array(labels)
names = np.array(names)

In [None]:
features = ekstraksiData(names, imgs, labels)

In [None]:
columns = [
    "Kontras0",
    "Dissimilarity0",
    "Homogenitas0",
    "Entropi0",
    "ASM0",
    "Energy0",
    "Correlation0",
    "Kontras45",
    "Dissimilarity45",
    "Homogenitas45",
    "Entropi45",
    "ASM45",
    "Energy45",
    "Correlation45",
    "Kontras90",
    "Dissimilarity90",
    "Homogenitas90",
    "Entropi90",
    "ASM90",
    "Energy90",
    "Correlation90",
    "Kontras135",
    "Dissimilarity135",
    "Homogenitas135",
    "Entropi135",
    "ASM135",
    "Energy135",
    "Correlation135",
    "label",
]

import pandas as pd 

glcm_df = pd.DataFrame(features, 
                      columns = columns)
glcm_df.info()
glcm_df.to_csv("dataset.csv")

### Features Selection

In [None]:
def read_csv(file_path):
    data = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        headers = next(reader)  
        for row in reader:
            data.append(row)
    return headers, data


def modify_labels(data, label_column_index):
    label_mapping = {'finger_1': 1, 'finger_2': 2, 'finger_3': 3, 'finger_4': 4, 'finger_5': 5}
    for row in data:
        row[label_column_index] = label_mapping.get(row[label_column_index], row[label_column_index])
    return data


def calculate_correlation_matrix(data):
    data = np.array(data, dtype=float)
    n = data.shape[1]
    correlation_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            correlation_matrix[i, j] = calculate_correlation(data[:, i], data[:, j])
    return correlation_matrix


def calculate_correlation(x, y):
    mean_x, mean_y = np.mean(x), np.mean(y)
    std_x, std_y = np.std(x), np.std(y)
    covariance = np.mean((x - mean_x) * (y - mean_y))
    return covariance / (std_x * std_y)


def plot_correlation_matrix(correlation_matrix, headers):
    plt.figure(figsize=(12, 10))
    plt.imshow(correlation_matrix, cmap='coolwarm', interpolation='none')
    plt.colorbar()
    plt.xticks(range(len(headers)), headers, rotation=90)
    plt.yticks(range(len(headers)), headers)
    plt.show()


def remove_highly_correlated_features(correlation_matrix, headers, threshold=0.9):
    to_drop = set()
    n = correlation_matrix.shape[0]
    for i in range(n):
        for j in range(i + 1, n):
            if abs(correlation_matrix[i, j]) > threshold:
                to_drop.add(headers[j])
    reduced_headers = [header for header in headers if header not in to_drop]
    return reduced_headers


def save_reduced_dataset(file_path, headers, data):
    with open(file_path, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(headers)
        writer.writerows(data)

file_path = 'dataset.csv'  
headers, data = read_csv(file_path)

label_column_index = -1  
data = modify_labels(data, label_column_index)

data = np.array(data, dtype=float)
features = data[:, :-1]
labels = data[:, -1]

correlation_matrix = calculate_correlation_matrix(features)

plot_correlation_matrix(correlation_matrix, headers[:-1])

reduced_headers = remove_highly_correlated_features(correlation_matrix, headers[:-1], threshold=0.9)

reduced_headers.append(headers[label_column_index])
reduced_features = features[:, [headers.index(h) for h in reduced_headers[:-1]]]
reduced_data = np.hstack((reduced_features, labels.reshape(-1, 1)))

save_reduced_dataset('reduced_dataset.csv', reduced_headers, reduced_data)

print(f"Original number of features: {len(headers) - 1}")
print(f"Reduced number of features: {len(reduced_headers) - 1}")

### Splitting Data

In [None]:
def read_csv(file_path):
    data = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        headers = next(reader)  
        for row in reader:
            data.append(row)
    return headers, data

def split_data(data, test_ratio=0.3):
    np.random.shuffle(data)  
    test_size = int(len(data) * test_ratio)
    test_data = data[:test_size]
    train_data = data[test_size:]
    return train_data, test_data

def save_dataset(file_path, headers, data):
    with open(file_path, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(headers)
        writer.writerows(data)

file_path = 'reduced_dataset.csv'  
headers, data = read_csv(file_path)

train_data, test_data = split_data(data, test_ratio=0.3)

save_dataset('reduced_train_glcm_dataset.csv', headers, train_data)
save_dataset('reduced_test_glcm_dataset.csv', headers, test_data)

print(f"Training set size: {len(train_data)}")
print(f"Testing set size: {len(test_data)}")


### Normalization

In [None]:
def read_csv(file_path):
    data = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        headers = next(reader) 
        for row in reader:
            data.append([float(value) for value in row])
    return headers, np.array(data)

def min_max_normalize(data):
    min_vals = np.min(data, axis=0)
    max_vals = np.max(data, axis=0)
    norm_data = (data - min_vals) / (max_vals - min_vals)
    return norm_data

def save_dataset(file_path, headers, data):
    with open(file_path, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(headers)
        writer.writerows(data)

file_path = 'reduced_train_glcm_dataset.csv' 
headers, data = read_csv(file_path)

features = data[:, :-1]
labels = data[:, -1]

norm_features = min_max_normalize(features)

normalized_data = np.hstack((norm_features, labels.reshape(-1, 1)))

save_dataset('normalized_train_glcm_dataset.csv', headers, normalized_data)

print("Data normalization completed and saved to 'normalized_train_glcm_dataset.csv'")


In [None]:
file_path = 'reduced_test_glcm_dataset.csv' 
headers, data = read_csv(file_path)

features = data[:, :-1]
labels = data[:, -1]

norm_features = min_max_normalize(features)

normalized_data = np.hstack((norm_features, labels.reshape(-1, 1)))

save_dataset('normalized_test_glcm_dataset.csv', headers, normalized_data)

print("Data normalization completed and saved to 'normalized_test_glcm_dataset.csv'")

## Modeling

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


def read_csv(file_path):
    data = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        headers = next(reader)  
        for row in reader:
            data.append([float(value) for value in row])
    return headers, np.array(data)


def min_max_normalize(data):
    min_vals = np.min(data, axis=0)
    max_vals = np.max(data, axis=0)
    norm_data = (data - min_vals) / (max_vals - min_vals)
    return norm_data


train_file_path = 'normalized_train_glcm_dataset.csv'  
train_headers, train_data = read_csv(train_file_path)
train_features = train_data[:, :-1]
train_labels = train_data[:, -1]
train_features = min_max_normalize(train_features)


test_file_path = 'normalized_test_glcm_dataset.csv'  
test_headers, test_data = read_csv(test_file_path)
test_features = test_data[:, :-1]
test_labels = test_data[:, -1]
test_features = min_max_normalize(test_features)


knn = KNeighborsClassifier(n_neighbors=3)
svm = SVC(kernel='linear')
rf = RandomForestClassifier(n_estimators=100, random_state=42)


knn.fit(train_features, train_labels)
svm.fit(train_features, train_labels)
rf.fit(train_features, train_labels)


knn_predictions = knn.predict(test_features)
svm_predictions = svm.predict(test_features)
rf_predictions = rf.predict(test_features)


knn_accuracy = accuracy_score(test_labels, knn_predictions)
svm_accuracy = accuracy_score(test_labels, svm_predictions)
rf_accuracy = accuracy_score(test_labels, rf_predictions)


print(f"KNN Accuracy: {knn_accuracy * 100:.2f}%")
print(f"SVM Accuracy: {svm_accuracy * 100:.2f}%")
print(f"Random Forest Accuracy: {rf_accuracy * 100:.2f}%")


## Evaluation

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Hitung metrik evaluasi tambahan: presisi, recall, dan F1-score
knn_precision = precision_score(test_labels, knn_predictions, average='weighted')
knn_recall = recall_score(test_labels, knn_predictions, average='weighted')
knn_f1_score = f1_score(test_labels, knn_predictions, average='weighted')

svm_precision = precision_score(test_labels, svm_predictions, average='weighted')
svm_recall = recall_score(test_labels, svm_predictions, average='weighted')
svm_f1_score = f1_score(test_labels, svm_predictions, average='weighted')

rf_precision = precision_score(test_labels, rf_predictions, average='weighted')
rf_recall = recall_score(test_labels, rf_predictions, average='weighted')
rf_f1_score = f1_score(test_labels, rf_predictions, average='weighted')

results = {
    'Model': ['KNN', 'SVM', 'Random Forest'],
    'Accuracy': [knn_accuracy, svm_accuracy, rf_accuracy],
    'Precision': [knn_precision, svm_precision, rf_precision],
    'Recall': [knn_recall, svm_recall, rf_recall],
    'F1-Score': [knn_f1_score, svm_f1_score, rf_f1_score]
}

results_df = pd.DataFrame(results)

print(results_df)

In [None]:
def plot_classification_report(report, title='Classification Report', cmap='coolwarm'):
    lines = report.split('\n')
    
    classes = []
    matrix = []
    
    for line in lines[2:(len(lines) - 3)]:
        row = line.strip().split()
        if len(row) > 0:
            # Skip rows that do not contain class metrics
            if len(row) == 5:
                classes.append(row[0])
                matrix.append([float(x) for x in row[1:-1]])

    matrix = np.array(matrix)
    
    fig, ax = plt.subplots(figsize=(10, len(classes) * 1))
    cax = ax.matshow(matrix, cmap=cmap)
    plt.title(title, pad=20)
    fig.colorbar(cax)

    ax.set_xticks(np.arange(len(matrix[0])))
    ax.set_yticks(np.arange(len(classes)))

    ax.set_xticklabels(['Precision', 'Recall', 'F1-Score'], rotation=45)
    ax.set_yticklabels(classes)
    
    for i in range(len(classes)):
        for j in range(len(matrix[i])):
            ax.text(j, i, f'{matrix[i, j]:.2f}', va='center', ha='center', color='black')
    
    plt.xlabel('Metrics')
    plt.ylabel('Classes')
    plt.show()

classification_report_knn = classification_report(test_labels, knn_predictions)
plot_classification_report(classification_report_knn, title='Classification Report KNN')

classification_report_svm = classification_report(test_labels, svm_predictions)
plot_classification_report(classification_report_svm, title='Classification Report SVM')

classification_report_rf = classification_report(test_labels, rf_predictions)
plot_classification_report(classification_report_rf, title='Classification Report Random Forest')

In [None]:
def plot_confusion_matrix(y_true, y_pred, title='Confusion Matrix'):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, cmap='Blues')
    plt.xlabel('Predicted labels')
    plt.ylabel('True labels')
    plt.title(title)
    plt.show()

plot_confusion_matrix(test_labels, knn_predictions, title='KNN Confusion Matrix')

plot_confusion_matrix(test_labels, svm_predictions, title='SVM Confusion Matrix')

plot_confusion_matrix(test_labels, rf_predictions, title='RF Confusion Matrix')
