In [1]:
# %% [Sel 1] - Import Libraries
import time
import numpy as np
import os
from skimage.io import imread
from skimage.transform import resize
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils import shuffle
from imgaug import augmenters as iaa
import joblib
from scipy.stats import randint, uniform
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

print("[INFO] Semua library berhasil diimpor")

[INFO] Semua library berhasil diimpor


In [2]:
# %% [Sel 2] - Memuat Data dan Preprocessing
def load_data(data_dir, target_size=(64, 64)):
    start_time = time.time()
    images = []
    labels = []
    label_map = {'neutral': 0, 'happy': 1, 'sad': 2, 'angry': 3}
    
    print("[INFO] Memulai proses loading data...")
    for person_dir in os.listdir(data_dir):
        person_path = os.path.join(data_dir, person_dir)
        if not os.path.isdir(person_path):
            continue
            
        for img_file in os.listdir(person_path):
            if img_file.endswith('.png'):
                parts = img_file.split('_')
                if len(parts) >= 4:  # Memastikan ada ekspresi dalam nama file
                    expression = parts[2]
                    if expression in label_map:
                        img_path = os.path.join(person_path, img_file)
                        img = imread(img_path, as_gray=True)
                        img = resize(img, target_size, anti_aliasing=True)
                        img = (img * 255).astype(np.uint8)  # Skala ke 0-255
                        images.append(img)
                        labels.append(label_map[expression])
    
    elapsed = time.time() - start_time
    print(f"[INFO] Loading data selesai. Waktu eksekusi: {elapsed:.2f} detik")
    return np.array(images), np.array(labels)

# Path ke dataset
data_dir = 'faces-png'  # Update path ini sesuai lokasi dataset Anda
start_time = time.time()
images, labels = load_data(data_dir)

# Flatten images untuk model ML tradisional
n_samples = images.shape[0]
X = images.reshape((n_samples, -1))  # Flatten setiap gambar
y = labels

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
elapsed = time.time() - start_time
print(f"[INFO] Preprocessing data selesai. Waktu eksekusi: {elapsed:.2f} detik")
print(f"[INFO] Jumlah data latih: {len(X_train)}, Jumlah data test: {len(X_test)}")

[INFO] Memulai proses loading data...
[INFO] Loading data selesai. Waktu eksekusi: 6.86 detik
[INFO] Preprocessing data selesai. Waktu eksekusi: 6.88 detik
[INFO] Jumlah data latih: 1497, Jumlah data test: 375


In [3]:
def load_data(data_dir, target_size=(64, 64)):
    start_time = time.time()
    images = []
    labels = []
    label_map = {'neutral': 0, 'happy': 1, 'sad': 2, 'angry': 3}
    
    print("[INFO] Memulai proses loading data...")
    for person_dir in os.listdir(data_dir):
        person_path = os.path.join(data_dir, person_dir)
        if not os.path.isdir(person_path):
            continue
            
        for img_file in os.listdir(person_path):
            if img_file.endswith('.png'):
                parts = img_file.split('_')
                if len(parts) >= 4:  # Memastikan ada ekspresi dalam nama file
                    expression = parts[2]
                    if expression in label_map:
                        img_path = os.path.join(person_path, img_file)
                        img = imread(img_path, as_gray=True)
                        img = resize(img, target_size, anti_aliasing=True)
                        img = (img * 255).astype(np.uint8)  # Skala ke 0-255
                        images.append(img)
                        labels.append(label_map[expression])
    
    elapsed = time.time() - start_time
    print(f"[INFO] Loading data selesai. Waktu eksekusi: {elapsed:.2f} detik")
    return np.array(images), np.array(labels)

# Path ke dataset
data_dir = 'faces-png'  # Update path ini sesuai lokasi dataset Anda
start_time = time.time()
images, labels = load_data(data_dir)

# Flatten images for model ML tradisional
n_samples = images.shape[0]
X = images.reshape((n_samples, -1))  # Flatten setiap gambar (64x64 = 4096)
y = labels

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Define augmentation pipeline
aug = iaa.Sequential([
    iaa.Sometimes(0.5, iaa.Affine(
        rotate=(-20, 20),  # Rotate between -20 and 20 degrees
        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},  # Shift by up to 20%
        scale=(0.8, 1.2)  # Zoom between 80% and 120%
    )),
    iaa.Sometimes(0.5, iaa.Fliplr(1.0)),  # Horizontal flip with 50% probability
])

# Augment training data
n_augmentations = 5  # Number of augmented images per original training image
augmented_images = []
augmented_labels = []

print("[INFO] Memulai augmentasi data...")
aug_start_time = time.time()

# Only augment training data
train_images = images[y_train.argsort()]  # Reconstruct training images based on y_train
for i in range(len(X_train)):
    img = train_images[i].reshape(64, 64)  # Reshape to 2D for augmentation
    label = y_train[i]
    # Generate n_augmentations augmented images
    for _ in range(n_augmentations):
        img_aug = aug.augment_image(img)
        img_aug = (img_aug * 255).astype(np.uint8)  # Ensure 0-255 scale
        augmented_images.append(img_aug.reshape(-1))  # Flatten to 4096
        augmented_labels.append(label)

# Convert to numpy arrays
augmented_images = np.array(augmented_images)
augmented_labels = np.array(augmented_labels)

# Combine original and augmented training data
X_train = np.concatenate([X_train, augmented_images])
y_train = np.concatenate([y_train, augmented_labels])

elapsed = time.time() - aug_start_time
print(f"[INFO] Augmentasi data selesai. Waktu eksekusi: {elapsed:.2f} detik")
print(f"[INFO] Jumlah data latih setelah augmentasi: {len(X_train)}, Jumlah data test: {len(X_test)}")

[INFO] Memulai proses loading data...
[INFO] Loading data selesai. Waktu eksekusi: 6.77 detik
[INFO] Memulai augmentasi data...
[INFO] Augmentasi data selesai. Waktu eksekusi: 8.24 detik
[INFO] Jumlah data latih setelah augmentasi: 8982, Jumlah data test: 375


In [7]:
from collections import Counter
print("Distribusi kelas:", Counter(y_train))

Distribusi kelas: Counter({0: 2274, 2: 2244, 1: 2232, 3: 2232})


In [9]:
# %% [Sel 4] - PCA dan Preprocessing
start_time = time.time()
print("[INFO] Memulai proses scaling dan PCA...")

# Pertama scaling data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Terapkan PCA
pca = PCA(n_components=0.95, random_state=42)  # Pertahankan 95% variance
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

elapsed = time.time() - start_time
print(f"[INFO] PCA selesai. Waktu eksekusi: {elapsed:.2f} detik")
print(f"Dimensi asli: {X_train.shape[1]}")
print(f"Dimensi setelah PCA: {X_train_pca.shape[1]}")

[INFO] Memulai proses scaling dan PCA...
[INFO] PCA selesai. Waktu eksekusi: 44.34 detik
Dimensi asli: 4096
Dimensi setelah PCA: 634


In [12]:
# %% [Sel 5] - Pelatihan Model Dasar dengan Tuning Hyperparameter
def train_and_evaluate_model(model, params, name, X_train, y_train, X_test, y_test):
    start_time = time.time()
    print(f"\n[INFO] Memulai training model {name}...")
    
    # Gunakan RandomizedSearchCV untuk tuning hyperparameter
    rs = RandomizedSearchCV(model, params, n_iter=20, cv=3, 
                          n_jobs=-1, scoring='accuracy', random_state=42)
    rs.fit(X_train, y_train)
    best_model = rs.best_estimator_
    
    # Evaluasi model
    y_pred = best_model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    
    elapsed = time.time() - start_time
    print(f"[INFO] Training model {name} selesai. Waktu eksekusi: {elapsed:.2f} detik")
    print(f"Parameter terbaik {name}: {rs.best_params_}")
    print(f"Akurasi {name}: {acc:.4f}")
    
    return best_model, acc

# SVM Model
svm_params = {
    'C': uniform(0.1, 10),
    'gamma': ['scale', 'auto'] + list(np.logspace(-3, 1, 5)),
    'kernel': ['rbf', 'poly', 'sigmoid']
}
best_svm, svm_acc = train_and_evaluate_model(
    SVC(probability=True, random_state=42), 
    svm_params, 
    'SVM', 
    X_train_pca, y_train, 
    X_test_pca, y_test
)

# Random Forest
rf_params = {
    'n_estimators': randint(50, 300),
    'max_depth': [None] + list(np.arange(5, 30, 5)),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 10),
    'bootstrap': [True, False]
}
best_rf, rf_acc = train_and_evaluate_model(
    RandomForestClassifier(random_state=42), 
    rf_params, 
    'Random Forest', 
    X_train_pca, y_train, 
    X_test_pca, y_test
)

# KNN
knn_params = {
    'n_neighbors': randint(1, 20),
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}
best_knn, knn_acc = train_and_evaluate_model(
    KNeighborsClassifier(), 
    knn_params, 
    'KNN', 
    X_train_pca, y_train, 
    X_test_pca, y_test
)


[INFO] Memulai training model SVM...
[INFO] Training model SVM selesai. Waktu eksekusi: 3103.44 detik
Parameter terbaik SVM: {'C': 6.274815096277165, 'gamma': 'auto', 'kernel': 'poly'}
Akurasi SVM: 0.7467

[INFO] Memulai training model Random Forest...
[INFO] Training model Random Forest selesai. Waktu eksekusi: 658.56 detik
Parameter terbaik Random Forest: {'bootstrap': False, 'max_depth': 25, 'min_samples_leaf': 6, 'min_samples_split': 3, 'n_estimators': 241}
Akurasi Random Forest: 0.6800

[INFO] Memulai training model KNN...
[INFO] Training model KNN selesai. Waktu eksekusi: 76.06 detik
Parameter terbaik KNN: {'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}
Akurasi KNN: 0.1333


In [14]:
# %% [Sel 6] - Ensemble Learning dengan Voting Classifier
start_time = time.time()
print("\n[INFO] Membuat Voting Classifier...")

voting_clf = VotingClassifier(
    estimators=[
        ('svm', best_svm),
        ('rf', best_rf),
        ('knn', best_knn)
    ],
    voting='hard'  # Gunakan soft voting untuk weighting berbasis probabilitas
)

voting_clf.fit(X_train_pca, y_train)
y_pred_voting = voting_clf.predict(X_test_pca)
voting_acc = accuracy_score(y_test, y_pred_voting)

elapsed = time.time() - start_time
print(f"[INFO] Voting Classifier selesai. Waktu eksekusi: {elapsed:.2f} detik")
print(f"Akurasi Voting Classifier: {voting_acc:.4f}")


[INFO] Membuat Voting Classifier...
[INFO] Voting Classifier selesai. Waktu eksekusi: 573.21 detik
Akurasi Voting Classifier: 0.6587


In [15]:
# %% [Sel 8] - Evaluasi Final dan Pemilihan Model Terbaik
print("\n[INFO] Membandingkan performa semua model...")
model_accuracies = {
    'SVM': svm_acc,
    'Random Forest': rf_acc,
    'KNN': knn_acc,
    'Voting Classifier': voting_acc
}

# Temukan model dengan akurasi tertinggi
best_model_name = max(model_accuracies, key=model_accuracies.get)
best_model = {
    'SVM': best_svm,
    'Random Forest': best_rf,
    'KNN': best_knn,
    'Voting Classifier': voting_clf
}[best_model_name]

print("\nPerforma semua model:")
for name, acc in model_accuracies.items():
    print(f"{name}: {acc:.4f}")

print(f"\nModel terbaik adalah {best_model_name} dengan akurasi {model_accuracies[best_model_name]:.4f}")

# Tampilkan classification report untuk model terbaik
y_pred_best = best_model.predict(X_test_pca)
print("\nClassification Report Model Terbaik:")
print(classification_report(y_test, y_pred_best, 
                          target_names=['neutral', 'happy', 'sad', 'angry']))


[INFO] Membandingkan performa semua model...

Performa semua model:
SVM: 0.7467
Random Forest: 0.6800
KNN: 0.1333
Voting Classifier: 0.6587

Model terbaik adalah SVM dengan akurasi 0.7467

Classification Report Model Terbaik:
              precision    recall  f1-score   support

     neutral       0.82      0.79      0.80        95
       happy       0.67      0.67      0.67        93
         sad       0.74      0.73      0.74        94
       angry       0.76      0.80      0.78        93

    accuracy                           0.75       375
   macro avg       0.75      0.75      0.75       375
weighted avg       0.75      0.75      0.75       375



In [16]:
# %% [Sel 9] - Menyimpan Model Terbaik
start_time = time.time()
print(f"\n[INFO] Menyimpan model terbaik ({best_model_name}) ke file...")

# Buat pipeline lengkap termasuk preprocessing
pipeline = Pipeline([
    ('scaler', scaler),
    ('pca', pca),
    ('classifier', best_model)
])

# Simpan pipeline ke file
model_filename = 'best_facial_expression_model.pkl'
joblib.dump(pipeline, model_filename)

elapsed = time.time() - start_time
print(f"[INFO] Model berhasil disimpan sebagai {model_filename}. Waktu eksekusi: {elapsed:.2f} detik")


[INFO] Menyimpan model terbaik (SVM) ke file...
[INFO] Model berhasil disimpan sebagai best_facial_expression_model.pkl. Waktu eksekusi: 0.09 detik


In [17]:
# %% [Memuat Model yang Telah Disimpan]
def load_model(model_path):
    print("[INFO] Memuat model dari file...")
    model = joblib.load(model_path)
    print("[INFO] Model berhasil dimuat")
    return model

# Path ke model Anda
MODEL_PATH = 'best_facial_expression_model.pkl'  # Sesuaikan dengan lokasi file model

# Memuat model
pipeline = load_model(MODEL_PATH)

[INFO] Memuat model dari file...
[INFO] Model berhasil dimuat


In [32]:
# %% [Fungsi Preprocessing Gambar]
def preprocess_image(image_path):
    print(f"[INFO] Memproses gambar: {image_path}")
    # Baca gambar sebagai grayscale
    img = imread(image_path, as_gray=True)
    # Resize ke 64x64 (sesuai dengan training)
    img = resize(img, (64, 64), anti_aliasing=True)
    # Scale ke 0-255
    img = (img * 255).astype(np.uint8)
    # Flatten gambar
    img_flat = img.reshape(1, -1)  # Bentuk (1, 4096) untuk 64x64
    return img, img_flat  # Kembalikan gambar asli dan yang sudah diproses

# %% [Fungsi Prediksi dan Visualisasi]
def predict_and_visualize(model, image_path):
    # Preprocess gambar
    img_original, img_processed = preprocess_image(image_path)
    
    # Lakukan prediksi probabilitas
    proba = model.predict_proba(img_processed)[0]
    
    # Tentukan prediksi berdasarkan probabilitas tertinggi
    pred = np.argmax(proba)  # Kelas dengan probabilitas tertinggi
    
    # Map kelas ke label
    label_map = {0: 'neutral', 1: 'happy', 2: 'sad', 3: 'angry'}
    expression = label_map[pred]
    
    # Buat figure untuk menampilkan gambar dan hasil prediksi
    plt.figure(figsize=(10, 5))
    
    # Tampilkan gambar
    plt.subplot(1, 2, 1)
    plt.imshow(img_original, cmap='gray')
    plt.title('Input Image')
    plt.axis('off')
    
    # Tampilkan hasil prediksi
    plt.subplot(1, 2, 2)
    bars = plt.bar(label_map.values(), proba*100, color='skyblue')
    
    # Beri warna berbeda untuk prediksi teratas
    for i, bar in enumerate(bars):
        if i == pred:
            bar.set_color('salmon')
    
    plt.ylabel('Probability (%)')
    plt.title(f'Prediction: {expression}\n(Confidence: {proba[pred]*100:.1f}%)')
    plt.ylim(0, 100)
    
    # Tampilkan nilai probabilitas di atas setiap bar
    for i, (label, prob) in enumerate(zip(label_map.values(), proba)):
        plt.text(i, prob*100 + 2, f'{prob*100:.1f}%', 
                 ha='center', va='bottom', fontsize=10)
    
    plt.tight_layout()
    plt.show()
    
    # Tampilkan hasil di console juga
    print("\nHasil Prediksi untuk", os.path.basename(image_path))
    print(f"Ekspresi: {expression}")
    print("Probabilitas:")
    for i, (label, prob) in enumerate(zip(label_map.values(), proba)):
        print(f"- {label}: {prob*100:.2f}%")
    
    return expression, proba

# %% [Proses Semua Gambar dalam Folder Test]
if __name__ == "__main__":
    # Path ke folder test
    TEST_FOLDER = 'test/'  # Ganti dengan path folder test Anda
    
    # Dapatkan semua file gambar dalam folder
    image_files = [f for f in os.listdir(TEST_FOLDER) 
                  if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    if not image_files:
        print(f"[ERROR] Tidak ditemukan gambar dalam folder {TEST_FOLDER}")
    else:
        print(f"[INFO] Memproses {len(image_files)} gambar dari folder {TEST_FOLDER}")
        
        # Proses setiap gambar
        for img_file in image_files:
            img_path = os.path.join(TEST_FOLDER, img_file)
            try:
                expression, probabilities = predict_and_visualize(pipeline, img_path)
            except Exception as e:
                print(f"[ERROR] Gagal memproses {img_file}: {str(e)}")

[ERROR] Tidak ditemukan gambar dalam folder test/
