### Import Library

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern, hog
from skimage.filters import gabor
from skimage.transform import resize
from skimage.util import img_as_ubyte
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler

### Load Data

In [2]:
# Fungsi untuk membaca dataset dan label
def load_images_and_labels(folder_path):
    data = []
    for root, _, files in os.walk(folder_path):
        for file_name in files:
            if file_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                file_path = os.path.join(root, file_name)
                label = int(file_name.split('_')[-1].split('.')[0])  # Ambil usia dari nama file
                data.append((file_path, label))
    return pd.DataFrame(data, columns=['filepath', 'label'])

### Ekstraksi Fitur

In [3]:
# Ekstraksi fitur HOG
def extract_hog_features(img, size=(200, 200)):
    img_resized = resize(img, size, anti_aliasing=True)
    features, _ = hog(
        img_resized, 
        orientations=9, 
        pixels_per_cell=(8, 8), 
        cells_per_block=(2, 2),
        block_norm='L2-Hys', 
        visualize=True,
        channel_axis=None
    )
    return features

# Ekstraksi fitur LBP
def extract_lbp_features(img, size=(200, 200)):
    img_resized = cv2.resize(img, size)
    img_resized = img_as_ubyte(img_resized)
    radius = 1
    n_points = 8 * radius
    lbp = local_binary_pattern(img_resized, n_points, radius, method='uniform')
    
    hist, _ = np.histogram(
        lbp.ravel(),
        bins=np.arange(0, n_points + 3),
        range=(0, n_points + 2)
    )
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

# Ekstraksi fitur Gabor
def extract_gabor_features(img, size=(200, 200)):
    img_resized = resize(img, size, anti_aliasing=True)
    filters = []
    for theta in (0, np.pi / 4, np.pi / 2, 3 * np.pi / 4):  # Sudut orientasi
        for frequency in (0.1, 0.2, 0.3):  # Frekuensi Gabor
            real, _ = gabor(img_resized, frequency=frequency, theta=theta)
            filters.append(np.mean(real))
            filters.append(np.std(real))
    return filters

In [4]:
# Gabungkan semua fitur
def extract_features_from_image(img_path, size=(200, 200)):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return np.array([])  # Jika gambar tidak valid, return array kosong
    
    # Ekstraksi fitur
    hog_features = extract_hog_features(img, size)
    lbp_features = extract_lbp_features(img, size)
    gabor_features = extract_gabor_features(img, size)
    
    # Gabungkan semua fitur
    return np.hstack([hog_features, lbp_features, gabor_features])

In [None]:
# Ekstraksi fitur dari dataset
def extract_features(data_df):
    features = []
    labels = []
    for _, row in data_df.iterrows():
        img_features = extract_features_from_image(row['filepath'])
        if img_features.size > 0:  # Skip gambar tanpa fitur
            features.append(img_features)
            labels.append(row['label'])
    return np.array(features), np.array(labels)

In [6]:
# Path folder data
train_folder = 'dataset/final_dataset/train'
test_folder = 'dataset/final_dataset/test'

# Load data train dan test
train_df = load_images_and_labels(train_folder)
test_df = load_images_and_labels(test_folder)

In [7]:
# Ekstraksi fitur train
X_train, y_train = extract_features(train_df)

In [8]:
# Ekstraksi fitur test
X_test, y_test = extract_features(test_df)

In [None]:
# Scaling data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Klasifikasi

In [13]:
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Buat model XGBoost
xgb_model = XGBClassifier(n_estimators=100, max_depth=5, random_state=42)

# Melatih model pada data latih
xgb_model.fit(X_train, y_train)

# Prediksi rentang usia pada data uji
y_pred = xgb_model.predict(X_test)

# Evaluasi model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred)) 

print("\nClassification Report:")
print(classification_report(y_test, y_pred)) 

# Akurasi model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Confusion Matrix:
[[453  25   9  15   3   5]
 [ 20 384  63  22   8  13]
 [  5  46 294 119  34  12]
 [  5  15 154 211  93  32]
 [  2   8  31 100 295  74]
 [  1   5  10  46  59 389]]

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.89      0.91       510
           1       0.80      0.75      0.77       510
           2       0.52      0.58      0.55       510
           3       0.41      0.41      0.41       510
           4       0.60      0.58      0.59       510
           5       0.74      0.76      0.75       510

    accuracy                           0.66      3060
   macro avg       0.67      0.66      0.66      3060
weighted avg       0.67      0.66      0.66      3060

Accuracy: 66.21%


In [None]:
import pickle

# Saving the XGBoost model from above in a pickle file for possible use later.
xgb_pickle = f"model/xgboost_hog_lbp_model_acc_{accuracy}.pkl"
with open(xgb_pickle, 'wb') as file:
    pickle.dump(xgb_model, file)

: 