### Load Library

In [9]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from skimage.feature import hog, local_binary_pattern

### Load Path Dataset

In [2]:
# Path dataset lokal
train_folder = 'dataset/final_dataset/train'
test_folder = 'dataset/final_dataset/test'

### Ekstrasi Fitur

In [None]:
# Fungsi untuk mendapatkan target label berdasarkan struktur folder
def load_images_and_labels(folder_path):
    data = []
    for root, _, files in os.walk(folder_path):
        for file_name in files:
            if file_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                file_path = os.path.join(root, file_name)
                # Ekstrak label dari angka sebelum ekstensi
                label = int(file_name.split('_')[-1].split('.')[0])
                data.append((file_path, label))  # Path dan label
    return pd.DataFrame(data, columns=['filepath', 'label'])

# Load data train dan test
train_df = load_images_and_labels(train_folder)
test_df = load_images_and_labels(test_folder)

# Ekstrak fitur HOG
def extract_hog_features(img, size=(200, 200)):
    img_resized = cv2.resize(img, size)
    features, _ = hog(
        img_resized,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(2, 2),
        block_norm='L2-Hys',
        visualize=True,
        multichannel=False
    )
    return features

# Ekstraksi fitur LBP
def extract_lbp_features(img, size=(200, 200)):
    img_resized = cv2.resize(img, size)
    radius = 1
    n_points = 8 * radius
    lbp = local_binary_pattern(img_resized, n_points, radius, method='uniform')
    # Hitung histogram LBP
    (hist, _) = np.histogram(
        lbp.ravel(),
        bins=np.arange(0, n_points + 3),
        range=(0, n_points + 2)
    )
    # Normalisasi histogram
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

# Fungsi untuk ekstraksi fitur dari gambar menggunakan grid 10x10
def extract_features_from_image(img_path, size=(200, 200), grid_size=10):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, size)
    
    features = []
    for y in range(0, size[0], grid_size):
        for x in range(0, size[1], grid_size):
            # Potong gambar sesuai grid
            section = img[y:y+grid_size, x:x+grid_size]
            # Ekstraksi fitur HOG untuk tiap grid
            hog_features = extract_hog_features(section, size=(grid_size, grid_size))
            features.extend(hog_features)
            # Ekstraksi fitur LBP untuk tiap grid
            lbp_features = extract_lbp_features(section, size=(grid_size, grid_size))
            features.extend(lbp_features)
    return np.array(features)

# Ekstraksi fitur dari dataset
def extract_features(data_df):
    features = []
    labels = []
    for _, row in data_df.iterrows():
        img_features = extract_features_from_image(row['filepath'])
        features.append(img_features)
        labels.append(row['label'])
    return np.array(features), np.array(labels)

# Ekstraksi fitur train dan test
X_train, y_train = extract_features(train_folder)
X_test, y_test = extract_features(test_folder)

# Standarisasi fitur
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
X_test_sc = scaler.transform(X_test)

ValueError: The input image is too small given the values of pixels_per_cell and cells_per_block. It should have at least: 16 rows and 16 cols.

### Klasifikasi

In [9]:
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# Membuat model XGBoost
xgb_model = XGBClassifier(n_estimators=100, max_depth=5, random_state=42)

# Melatih model
xgb_model.fit(X_train_sc, y_train)

# Evaluasi model
y_pred = xgb_model.predict(X_test_sc)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Akurasi Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Confusion Matrix:
[[459  23  11   5   3   9]
 [ 19 379  59  36   6  11]
 [ 14  61 286  95  39  15]
 [  2  35 135 215  93  30]
 [  8   8  31  95 297  71]
 [  1  10  24  39  51 385]]
Accuracy: 66.05%


### Saving Model

In [None]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
xgb_pickle = f"model/xgboost_hog_lbp_model_acc_{accuracy}.pkl"
with open(xgb_pickle, 'wb') as file:
    pickle.dump(xgb_model, file)

### SVM

In [11]:
# Model SVM
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_sc, y_train)

# Evaluasi
y_pred = svm_model.predict(X_test_sc)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Akurasi Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Confusion Matrix:
[[459  37   2   3   4   5]
 [ 45 376  63  16   3   7]
 [ 21  98 231 105  38  17]
 [ 19  52 138 184  87  30]
 [  5  28  60 131 222  64]
 [  8  23  25  69  84 301]]
Accuracy: 57.94%


In [None]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
svm_pickle = f"model/svm_hog_lbp_model_acc_{accuracy}.pkl"
with open(svm_pickle, 'wb') as file:
    pickle.dump(svm_model, file)

### Random Forest

In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# Membuat model Random Forest
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

# Melatih model dengan data training
rf_model.fit(X_train_sc, y_train)

# Prediksi menggunakan model Random Forest
y_pred = rf_model.predict(X_test_sc)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Akurasi Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Confusion Matrix:
[[449  22  12   9  10   8]
 [ 36 377  52  26   8  11]
 [ 33  79 244  73  45  36]
 [ 32  31 120 172  89  66]
 [  6  13  28  73 293  97]
 [ 16  15  28  26  64 361]]
Accuracy: 61.96%


In [None]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
svm_pickle = f"model/rf_hog_lbp_model_acc_{accuracy}.pkl"
with open(svm_pickle, 'wb') as file:
    pickle.dump(svm_model, file)