### Load Library

In [1]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

### Load Path Dataset

In [2]:
# Path dataset lokal
train_folder = 'assets/train'
test_folder = 'assets/test'

### Ekstrasi Fitur

In [4]:
from skimage.feature import canny

# Fungsi untuk mendapatkan target label berdasarkan struktur folder
def load_images_and_labels(folder_path):
    data = []
    for label in os.listdir(folder_path):
        label_path = os.path.join(folder_path, label)
        if os.path.isdir(label_path):  # Pastikan hanya memproses folder
            for file_name in os.listdir(label_path):
                if file_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_path = os.path.join(label_path, file_name)
                    data.append((file_path, int(label)))  # Path dan label
    return pd.DataFrame(data, columns=['filepath', 'label'])

# Load data train dan test
train_df = load_images_and_labels(train_folder)
test_df = load_images_and_labels(test_folder)

print(f"Jumlah foto di dataset training: {len(train_df)}")
print(f"Jumlah foto di dataset testing: {len(test_df)}")

# Fungsi untuk ekstraksi fitur dari gambar menggunakan grid 10x10
def extract_features_from_image(img_path, size=(200, 200)):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, size)
    
    # Deteksi tepi menggunakan Canny dari skimage dengan sigma
    img = canny(img, sigma=0.9)
    
    features = []
    for y in range(0, size[0], 10):
        for x in range(0, size[1], 10):
            section = img[y:y+10, x:x+10]
            features.append(np.mean(section))
            features.append(np.std(section))
    return np.array(features)

# Ekstraksi fitur dari dataset
def extract_features(data_df):
    features = []
    labels = []
    for _, row in data_df.iterrows():
        img_features = extract_features_from_image(row['filepath'])
        features.append(img_features)
        labels.append(row['label'])
    return np.array(features), np.array(labels)

# Ekstraksi fitur train dan test
X_train, y_train = extract_features(train_df)
X_test, y_test = extract_features(test_df)

# Standarisasi fitur
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
X_test_sc = scaler.transform(X_test)

Jumlah foto di dataset training: 120
Jumlah foto di dataset testing: 60


### Klasifikasi

In [6]:
!pip install xgboost



In [5]:
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# Membuat model XGBoost
xgb_model = XGBClassifier(n_estimators=100, max_depth=5, random_state=42)

# Melatih model
xgb_model.fit(X_train_sc, y_train)

# Evaluasi model
y_pred = xgb_model.predict(X_test_sc)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Akurasi Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Confusion Matrix:
[[5 4 1 0 0 0]
 [0 0 6 4 0 0]
 [1 1 0 3 2 3]
 [1 2 0 4 1 2]
 [1 2 2 1 3 1]
 [0 5 0 2 1 2]]
Accuracy: 23.33%


### Saving Model

In [6]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
xgb_pickle = f"model/new_xgboost_canny_model_acc_{round(accuracy, 2)}.pkl"
with open(xgb_pickle, 'wb') as file:
    pickle.dump(xgb_model, file)

### SVM

In [8]:
from sklearn.svm import SVC

# Model SVM
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_sc, y_train)

# Evaluasi
y_pred = svm_model.predict(X_test_sc)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Akurasi Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Confusion Matrix:
[[4 1 0 2 1 2]
 [0 2 4 3 1 0]
 [1 0 3 2 1 3]
 [3 1 0 3 1 2]
 [1 2 1 1 3 2]
 [3 1 0 4 1 1]]
Accuracy: 26.67%


In [9]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
svm_pickle = f"model/new_svm_canny_model_acc_{round(accuracy, 2)}.pkl"
with open(svm_pickle, 'wb') as file:
    pickle.dump(svm_model, file)

### Random Forest

In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# Membuat model Random Forest
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

# Melatih model dengan data training
rf_model.fit(X_train_sc, y_train)

# Prediksi menggunakan model Random Forest
y_pred = rf_model.predict(X_test_sc)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Akurasi Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Confusion Matrix:
[[6 2 0 0 1 1]
 [0 0 3 4 2 1]
 [0 0 0 3 3 4]
 [3 0 1 2 2 2]
 [1 0 4 0 4 1]
 [1 3 3 1 2 0]]
Accuracy: 20.00%


In [11]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
svm_pickle = f"model/new_rf_canny_model_acc_{round(accuracy, 2)}.pkl"
with open(svm_pickle, 'wb') as file:
    pickle.dump(svm_model, file)