### Load Library

In [26]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

### Load Path Dataset

In [27]:
# Path dataset lokal
train_folder = 'dataset/final_dataset/train'
test_folder = 'dataset/final_dataset/test'

### Ekstrasi Fitur

In [28]:
import cv2

def face_detection(image_path, size=(200, 200)):
    # Load Haar Cascade
    cascade_wajah = cv2.CascadeClassifier('input_output/haarcascade_frontalface_default.xml')

    # Membaca gambar
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Menggunakan variabel `image_path`
    if image is None:
        raise ValueError(f"Gambar tidak ditemukan atau tidak dapat dibaca: {image_path}")

    # Deteksi wajah
    faces = cascade_wajah.detectMultiScale(image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    # Wajah tidak terdeteksi
    if len(faces) == 0:
        # Jika tidak ada wajah, crop 70% dari tengah gambar
        height, width = image.shape[:2]
        new_height = int(height * 0.70)
        new_width = int(width * 0.70)

        # Menghitung margin untuk cropping agar tetap di tengah
        top_margin = (height - new_height) // 2
        left_margin = (width - new_width) // 2

        # Memotong area gambar
        face_crop = image[top_margin:top_margin + new_height, left_margin:left_margin + new_width]
        face_crop = cv2.resize(face_crop, size)
        return face_crop
    else:
        # Cari wajah dengan frame terbesar
        largest_face = max(faces, key=lambda rect: rect[2] * rect[3])  # Pilih berdasarkan area (w * h)
        x, y, w, h = largest_face

        # Memotong gambar sesuai area deteksi wajah terbesar
        face_crop = image[y:y + h, x:x + w]
        face_crop = cv2.resize(face_crop, size)

        # Mengurangi area gambar sebesar 10% (90% dari ukuran asli)
        height, width = face_crop.shape[:2]
        new_height = int(height * 0.70)
        new_width = int(width * 0.70)

        # Menghitung margin untuk cropping agar tetap di tengah
        top_margin = (height - new_height) // 2
        left_margin = (width - new_width) // 2

        # Memotong area gambar
        face_crop = face_crop[top_margin:top_margin + new_height, left_margin:left_margin + new_width]
        return face_crop


In [None]:
from skimage.feature import canny

# Fungsi untuk mendapatkan target label berdasarkan struktur folder
def load_images_and_labels(folder_path):
    data = []
    for root, _, files in os.walk(folder_path):
        for file_name in files:
            if file_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                file_path = os.path.join(root, file_name)
                # Ekstrak label dari angka sebelum ekstensi
                label = int(file_name.split('_')[-1].split('.')[0])
                data.append((file_path, label))  # Path dan label
    return pd.DataFrame(data, columns=['filepath', 'label'])

# Load data train dan test
train_df = load_images_and_labels(train_folder)
test_df = load_images_and_labels(test_folder)

# Fungsi untuk ekstraksi fitur dari gambar menggunakan grid 10x10
def extract_features_from_image(img_path, size=(200, 200)):
    # Tambahan Code untuk read, detect dan resize gambar
    img = face_detection(img_path, size)
    # img = cv2.resize(img, size) 
    # img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Deteksi tepi menggunakan Canny dari skimage dengan sigma
    img = canny(img, sigma=0.9)
    
    features = []
    for y in range(0, size[0], 10):
        for x in range(0, size[1], 10):
            section = img[y:y+10, x:x+10]
            features.append(np.mean(section))
            features.append(np.std(section))
    return np.array(features)

# Ekstraksi fitur dari dataset
def extract_features(data_df):
    features = []
    labels = []
    for _, row in data_df.iterrows():
        img_features = extract_features_from_image(row['filepath'])
        features.append(img_features)
        labels.append(row['label'])
    return np.array(features), np.array(labels)

# Ekstraksi fitur train dan test
X_train, y_train = extract_features(train_df)
X_test, y_test = extract_features(test_df)

# Standarisasi fitur
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
X_test_sc = scaler.transform(X_test)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


### Klasifikasi

In [6]:
!pip install xgboost



In [22]:
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# Membuat model XGBoost
xgb_model = XGBClassifier(n_estimators=100, max_depth=5, random_state=42)

# Melatih model
xgb_model.fit(X_train_sc, y_train)

# Evaluasi model
y_pred = xgb_model.predict(X_test_sc)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Akurasi Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Confusion Matrix:
[[394  43  29  20   7  17]
 [ 65 260  84  51  30  20]
 [ 23  78 237  95  48  29]
 [ 35  45 139 162  79  50]
 [ 25  25  63  87 216  94]
 [ 35  31  31  61  84 268]]
Accuracy: 50.23%


### Saving Model

In [23]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
xgb_pickle = f"model/xgboost_canny_model_acc_{round(accuracy, 2)}.pkl"
with open(xgb_pickle, 'wb') as file:
    pickle.dump(xgb_model, file)

### SVM

In [None]:
from sklearn.svm import SVC

# Model SVM
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_sc, y_train)

# Evaluasi
y_pred = svm_model.predict(X_test_sc)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Akurasi Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

ValueError: Input X contains NaN.
SVC does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [26]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
svm_pickle = f"model/svm_canny_model_acc_{round(accuracy, 2)}.pkl"
with open(svm_pickle, 'wb') as file:
    pickle.dump(svm_model, file)

### Random Forest

In [25]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# Membuat model Random Forest
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

# Melatih model dengan data training
rf_model.fit(X_train_sc, y_train)

# Prediksi menggunakan model Random Forest
y_pred = rf_model.predict(X_test_sc)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Akurasi Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Confusion Matrix:
[[408  32  34  15   9  12]
 [ 52 305  77  30  30  16]
 [ 49  46 257  69  54  35]
 [ 31  31 151 142 108  47]
 [ 21  32  63  46 276  72]
 [ 20  23  34  32  86 315]]
Accuracy: 55.65%


In [27]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
svm_pickle = f"model/rf_canny_model_acc_{round(accuracy, 2)}.pkl"
with open(svm_pickle, 'wb') as file:
    pickle.dump(svm_model, file)