# Pipeline Rekognisi Wajah v2: Perbaikan Efisiensi dan Evaluasi


### Langkah 1: Instalasi Pustaka yang Diperlukan

Sel ini memastikan semua pustaka yang dibutuhkan terinstal, termasuk `pybrisque` dan `pyiqa` untuk analisis kualitas gambar.


In [None]:
# %pip install gfpgan deepface facexlib tqdm pandas matplotlib scikit-learn seaborn pybrisque pyiqa

### Langkah 2: Impor Pustaka & Konfigurasi Path

Mengimpor semua modul yang diperlukan dan mendefinisikan path utama untuk data, hasil, dan cache. Menggunakan `sys.executable` untuk memastikan portabilitas.


In [None]:
import os
import sys
import glob
import cv2
import numpy as np
import json
import time
import pickle
import torch
from collections import defaultdict
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pandas as pd
from deepface import DeepFace
from gfpgan import GFPGANer
from brisque import BRISQUE
import pyiqa
from scipy.spatial.distance import cosine
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# --- Konfigurasi Path --- 
BASE_DIR = os.path.abspath('.')
PROJECT_ROOT = os.path.abspath(os.path.join(BASE_DIR, '..'))
GALLERY_PATH = os.path.join(BASE_DIR, 'data', 'gallery')
PROBES_PATH = os.path.join(BASE_DIR, 'data', 'probes')
RESULTS_PATH = os.path.join(BASE_DIR, 'results_v2') # Folder hasil baru
CACHE_PATH = os.path.join(BASE_DIR, 'cache') # Folder cache baru

# Pastikan folder results dan cache ada
os.makedirs(RESULTS_PATH, exist_ok=True)
os.makedirs(CACHE_PATH, exist_ok=True)

VENV_PYTHON_PATH = sys.executable # Path python yang robust
EMBEDDING_CACHE_FILE = os.path.join(CACHE_PATH, 'embedding_cache.pkl')

print(f"Notebook berjalan di: {BASE_DIR}")
print(f"Galeri Referensi: {GALLERY_PATH}")
print(f"Citra Uji (Probes): {PROBES_PATH}")
print(f"Hasil akan disimpan di: {RESULTS_PATH}")
print(f"Cache akan disimpan di: {CACHE_PATH}")

### Langkah 3: Inisialisasi Model-Model Utama

Semua model (GFPGAN, DeepFace, BRISQUE, NIQE) diinisialisasi satu kali di sini untuk efisiensi maksimal. Ini menghindari pemuatan ulang model di dalam loop.


In [None]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Menggunakan device: {DEVICE}')

# Inisialisasi GFPGAN Restorer
print("Memuat model GFPGAN v1.4...")
gfpgan_restorer = GFPGANer(
    model_path=os.path.join(PROJECT_ROOT, 'model_gfpgan', 'gfpgan', 'weights', 'GFPGANv1.4.pth'),
    upscale=2,
    arch='clean',
    channel_multiplier=2,
    bg_upsampler=None,
    device=DEVICE
 )
print("Model GFPGAN siap.")

# Inisialisasi BRISQUE Quality Assessor
print("Memuat model BRISQUE...")
brisque_assessor = BRISQUE() # url=False untuk path lokal
print("Model BRISQUE siap.")

# Inisialisasi NIQE Quality Assessor
print("Memuat model NIQE...")
niqe_assessor = pyiqa.create_metric('niqe', device=DEVICE)
print("Model NIQE siap.")

# Pre-load model DeepFace untuk 'warming up'
print("Warm-up model DeepFace (ArcFace)...")
try:
    _ = DeepFace.represent(np.zeros((112, 112, 3)), model_name='ArcFace', enforce_detection=False) 
    print("Model DeepFace siap.")
except Exception as e:
    print(f"Gagal warm-up DeepFace: {e}")

### Langkah 4: Fungsi-Fungsi Utilitas dan Pipeline

Fungsi-fungsi bantuan didefinisikan di sini. `get_embedding` sekarang memiliki mekanisme caching.


In [None]:
def parse_filename(filename):
    try:
        base_name = os.path.basename(filename)
        parts = os.path.splitext(base_name)[0].split('_')
        if len(parts) < 5: return None

        subject_id, height_id, distance_id = parts[0], parts[2], parts[4]
        if height_id == 'na' or distance_id == 'na': return None

        distance = 17 - (int(distance_id) / 2)
        if int(distance_id) > 24: distance_category = 'dekat'
        elif 14 <= int(distance_id) <= 24: distance_category = 'menengah'
        else: distance_category = 'jauh'

        height_map = {'3': 'rendah', '5': 'tinggi'}
        height_category = height_map.get(height_id)
        if not height_category: return None

        return {
            'subject_id': subject_id,
            'distance_m': distance,
            'distance_category': distance_category,
            'height_id': height_id,
            'height_category': height_category
        }
    except (IndexError, ValueError):
        return None

def get_embedding(image_path_or_array, model_name='ArcFace', detector_backend='retinaface'):
    try:
        embedding_obj = DeepFace.represent(
            img_path=image_path_or_array,
            model_name=model_name,
            enforce_detection=False,
            detector_backend=detector_backend
        )
        return embedding_obj[0]['embedding']
    except (ValueError, AttributeError, IndexError, TypeError):
        return None

def find_best_match(probe_embedding, gallery_embeddings):
    if probe_embedding is None or not gallery_embeddings:
        return None, float('inf')
    min_dist = float('inf')
    best_match_id = None
    for subject_id, gallery_embedding in gallery_embeddings.items():
        if gallery_embedding is None: continue
        dist = cosine(probe_embedding, gallery_embedding)
        if dist < min_dist:
            min_dist = dist
            best_match_id = subject_id
    return best_match_id, min_dist

def get_niqe_score(image_array, assessor):
    try:
        # Konversi BGR (OpenCV) ke RGB, lalu ke Tensor [0,1] C,H,W
        img_rgb = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
        img_tensor = torch.from_numpy(img_rgb).permute(2, 0, 1).unsqueeze(0) / 255.0
        score = assessor(img_tensor.to(DEVICE)).item()
        return score
    except Exception:
        return None

print("Fungsi utilitas siap digunakan.")

### Langkah 5: Pembuatan Galeri Referensi

Membuat database fitur dari gambar referensi berkualitas tinggi. Hasilnya disimpan dalam memori.


In [None]:
gallery_embeddings = {}
gallery_files = glob.glob(os.path.join(GALLERY_PATH, '*.jpg'))

print(f"Membuat database fitur dari {len(gallery_files)} gambar di galeri...")
for g_file in tqdm(gallery_files):
    subject_id = os.path.basename(g_file).split('_')[0]
    embedding = get_embedding(g_file)
    if embedding is not None:
        gallery_embeddings[subject_id] = embedding

print(f"Database fitur galeri berhasil dibuat untuk subjek: {list(gallery_embeddings.keys())}")

### Langkah 6: Eksekusi Pipeline Utama (Versi 2)

Ini adalah inti dari eksperimen. Loop ini akan memproses semua gambar uji, menjalankan kedua jalur, menghitung metrik kualitas, dan menyimpan semua hasil secara terstruktur.
Kegagalan pada tahap manapun (restorasi, ekstraksi fitur) akan dicatat dan dianggap sebagai prediksi yang salah untuk evaluasi yang adil.


In [None]:
# Muat cache jika ada
try:
    with open(EMBEDDING_CACHE_FILE, 'rb') as f:
        embedding_cache = pickle.load(f)
    print(f"Berhasil memuat {len(embedding_cache)} embedding dari cache.")
except FileNotFoundError:
    embedding_cache = {}
    print("Cache embedding tidak ditemukan, akan membuat baru.")
probe_files = glob.glob(os.path.join(PROBES_PATH, '*.JPG'))
results_v2 = []
print(f"Memulai pemrosesan {len(probe_files)} citra uji...")
start_time = time.time()
for probe_path in tqdm(probe_files):
    metadata = parse_filename(probe_path)
    if not metadata:
        continue
    probe_filename = os.path.basename(probe_path)
    ground_truth = metadata['subject_id']
    # --- Inisialisasi hasil untuk iterasi ini ---
    prediction_A, prediction_B = None, None
    is_correct_A, is_correct_B = False, False
    restoration_succeeded = False
    brisque_original, brisque_restored = None, None
    niqe_original, niqe_restored = None, None
    # --- Jalur A (Tanpa Restorasi) ---
    if probe_filename in embedding_cache:
        embedding_A = embedding_cache[probe_filename]
    else:
        embedding_A = get_embedding(probe_path)
        embedding_cache[probe_filename] = embedding_A
    if embedding_A is not None:
        prediction_A, _ = find_best_match(embedding_A, gallery_embeddings)
        if prediction_A == ground_truth:
            is_correct_A = True
    # --- Jalur B (Dengan Restorasi) ---
    try:
        img_original = cv2.imread(probe_path, cv2.IMREAD_COLOR)
        if img_original is not None:
            # Hitung metrik kualitas untuk gambar asli
            brisque_original = brisque_assessor.score(img_original)
            niqe_original = get_niqe_score(img_original, niqe_assessor)
            # Lakukan restorasi
            _, _, restored_img = gfpgan_restorer.enhance(img_original, has_aligned=False,
only_center_face=True, paste_to_face=True)
            if restored_img is not None:
                restoration_succeeded = True
                # Hitung metrik kualitas untuk gambar hasil restorasi
                brisque_restored = brisque_assessor.score(restored_img)
                niqe_restored = get_niqe_score(restored_img, niqe_assessor)
                # Dapatkan embedding dari gambar hasil restorasi
                embedding_B = get_embedding(restored_img)
                if embedding_B is not None:
                    prediction_B, _ = find_best_match(embedding_B, gallery_embeddings)
                    if prediction_B == ground_truth:
                        is_correct_B = True
    except Exception as e:
        # Jika ada error di tahap manapun di Jalur B, hasilnya dianggap gagal
        print(f"Error processing {probe_filename}: {e}") # INI BAGIAN PENTING UNTUK DEBUGGING
        is_correct_B = False
        restoration_succeeded = False
    results_v2.append({
        'file': probe_filename,
        'metadata': metadata,
        'ground_truth': ground_truth,
        'prediction_A': prediction_A,
        'is_correct_A': is_correct_A,
        'prediction_B': prediction_B,
        'is_correct_B': is_correct_B,
        'restoration_succeeded': restoration_succeeded,
        'brisque_original': brisque_original,
        'brisque_restored': brisque_restored,
        'niqe_original': niqe_original,
        'niqe_restored': niqe_restored
    })
# --- Simpan Hasil dan Cache ---
end_time = time.time()
print(f"Selesai memproses {len(results_v2)} citra uji dalam {end_time - start_time:.2f} detik.")
results_file_path = os.path.join(RESULTS_PATH, 'pipeline_results_v2.json')
with open(results_file_path, 'w') as f:
    json.dump(results_v2, f, indent=4)
print(f"Variabel 'results_v2' berhasil disimpan ke: {results_file_path}")
with open(EMBEDDING_CACHE_FILE, 'wb') as f:
    pickle.dump(embedding_cache, f)
print(f"Cache embedding berhasil disimpan ke: {EMBEDDING_CACHE_FILE}")

### Langkah 7: Analisis Hasil dan Visualisasi

Bagian ini memuat kembali hasil yang baru saja disimpan, lalu menghitung metrik evaluasi dan membuat visualisasi.


In [None]:
# Muat hasil dari file JSON untuk analisis
results_file_path = os.path.join(RESULTS_PATH, 'pipeline_results_v2.json')
with open(results_file_path, 'r') as f:
    results_data = json.load(f)

df = pd.DataFrame(results_data)
# Konversi metadata dictionary menjadi kolom terpisah
df_meta = pd.json_normalize(df['metadata'])
df = pd.concat([df.drop('metadata', axis=1), df_meta], axis=1)

print(f"Data hasil berhasil dimuat. Total {len(df)} baris.")
display(df.head())

#### 7.1 Analisis Performa Keseluruhan


In [None]:
def create_summary_table(df, group_by_col):
    summary_list = []
    categories = df[group_by_col].unique()

    for category in categories:
        subset_df = df[df[group_by_col] == category]
        count = len(subset_df)
        
        # Metrik Jalur A
        accuracy_A = subset_df['is_correct_A'].sum() / count if count > 0 else 0
        
        # Metrik Jalur B
        accuracy_B = subset_df['is_correct_B'].sum() / count if count > 0 else 0
        
        # Peningkatan
        improvement = ((accuracy_B - accuracy_A) / accuracy_A * 100) if accuracy_A > 0 else float('inf')
        
        summary_list.append({
            'Kategori': category,
            'Jumlah Sampel': count,
            'Akurasi (Jalur A)': f'{accuracy_A:.2%}',
            'Akurasi (Jalur B)': f'{accuracy_B:.2%}',
            'Peningkatan Akurasi': f'{improvement:+.2f}%' if improvement != float('inf') else 'N/A'
        })
    
    return pd.DataFrame(summary_list).set_index('Kategori')

# Analisis berdasarkan Jarak
summary_distance = create_summary_table(df, 'distance_category')
print("--- Analisis Berdasarkan Jarak ---")
display(summary_distance.reindex(['dekat', 'menengah', 'jauh']))

# Analisis berdasarkan Ketinggian
summary_height = create_summary_table(df, 'height_category')
print("\n--- Analisis Berdasarkan Ketinggian ---")
display(summary_height.reindex(['rendah', 'tinggi']))

#### 7.2 Analisis Kualitas Gambar (BRISQUE & NIQE)


In [None]:
iqa_df = df[['brisque_original', 'brisque_restored', 'niqe_original', 'niqe_restored']].copy()
iqa_df.dropna(inplace=True) # Hanya analisis gambar yang berhasil direstorasi

avg_brisque_original = iqa_df['brisque_original'].mean()
avg_brisque_restored = iqa_df['brisque_restored'].mean()
avg_niqe_original = iqa_df['niqe_original'].mean()
avg_niqe_restored = iqa_df['niqe_restored'].mean()

print("--- Analisis Kualitas Gambar (BRISQUE & NIQE) ---")
print(f"Skor rata-rata BRISQUE (Asli): {avg_brisque_original:.2f} (Lebih tinggi = kualitas lebih rendah)")
print(f"Skor rata-rata BRISQUE (Restorasi): {avg_brisque_restored:.2f} (Lebih rendah = kualitas lebih baik)")
print('---')
print(f"Skor rata-rata NIQE (Asli): {avg_niqe_original:.2f} (Lebih tinggi = kualitas lebih rendah)")
print(f"Skor rata-rata NIQE (Restorasi): {avg_niqe_restored:.2f} (Lebih rendah = kualitas lebih baik)")

# Visualisasi dengan Box Plot
plt.figure(figsize=(12, 7))
sns.boxplot(data=iqa_df, palette=['skyblue', 'lightgreen', 'salmon', 'lightcoral'])
plt.title('Perbandingan Skor Kualitas Gambar Sebelum dan Sesudah Restorasi')
plt.ylabel('Skor IQA')
plt.grid(axis='y', linestyle='--', alpha=0.7)
iqa_chart_path = os.path.join(RESULTS_PATH, 'iqa_comparison_v2.png')
plt.savefig(iqa_chart_path)
plt.show()
print(f"Grafik IQA disimpan di {iqa_chart_path}")

#### 7.3 Confusion Matrix


In [None]:
def plot_confusion_matrix_v2(df, pipeline_type, labels):
    if pipeline_type == 'A':
        title = 'Confusion Matrix - Jalur A (Tanpa Restorasi)'
        pred_key = 'prediction_A'
    else:
        title = 'Confusion Matrix - Jalur B (Dengan Restorasi)'
        pred_key = 'prediction_B'
    
    # Filter data yang valid untuk matriks
    cm_df = df.dropna(subset=['ground_truth', pred_key])
    y_true = cm_df['ground_truth']
    y_pred = cm_df[pred_key]
    
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels, linewidths=.5)
    plt.title(title, fontsize=15)
    plt.ylabel('Label Sebenarnya (True Label)')
    plt.xlabel('Label Prediksi (Predicted Label)')
    
    file_path = os.path.join(RESULTS_PATH, f'confusion_matrix_{pipeline_type}_v2.png')
    plt.savefig(file_path, bbox_inches='tight')
    print(f"Confusion Matrix untuk Jalur {pipeline_type} disimpan di: {file_path}")
    plt.show()

unique_labels = sorted(df['ground_truth'].unique())
plot_confusion_matrix_v2(df, 'A', unique_labels)
plot_confusion_matrix_v2(df, 'B', unique_labels)

#### 7.4 Laporan Klasifikasi per Subjek


In [None]:
print("--- Laporan Klasifikasi Jalur A (Tanpa Restorasi) ---")
report_A_df = df.dropna(subset=['ground_truth', 'prediction_A'])
print(classification_report(report_A_df['ground_truth'], report_A_df['prediction_A'], labels=unique_labels))

print("\n--- Laporan Klasifikasi Jalur B (Dengan Restorasi) ---")
report_B_df = df.dropna(subset=['ground_truth', 'prediction_B'])
print(classification_report(report_B_df['ground_truth'], report_B_df['prediction_B'], labels=unique_labels))