In [1]:
!pip install deepface mtcnn tensorflow kaggle --quiet

# --- IMPORT LIBRARY ---
import itertools
import os
import pandas as pd
import time
from deepface import DeepFace
from collections import defaultdict
import random

# --- KONFIGURASI EKSPERIMEN ---
DATASET_FOLDER = "dataset_foto_tes"
IDENTITY_FILE = "identity_CelebA.txt"
OUTPUT_CSV = "100photo_comparison_results.csv"

MODELS = ["Facenet", "OpenFace", "ArcFace"]
BACKENDS = ["opencv", "ssd", "mtcnn", "retinaface"]
METRICS = ["cosine", "euclidean"]

# --- FUNGSI HELPER ---
def get_result_type(is_same_person, verified):
    if is_same_person and verified: return "TP"
    elif not is_same_person and not verified: return "TN"
    elif not is_same_person and verified: return "FP"
    elif is_same_person and not verified: return "FN"
    return "UNKNOWN"

# --- LANGKAH 1: MEMBACA ULANG IDENTITAS FOTO ---
all_files_in_dataset = os.listdir(DATASET_FOLDER)
filename_to_id = {}
with open(IDENTITY_FILE, 'r') as f:
    for line in f:
        filename, person_id = line.strip().split()
        if filename in all_files_in_dataset:
            filename_to_id[filename] = person_id

# --- LANGKAH 2: MEMBUAT PASANGAN POSITIF & NEGATIF ---
positive_pairs = []
negative_pairs = []
grouped_by_id = defaultdict(list)
for filename, person_id in filename_to_id.items():
    grouped_by_id[person_id].append(filename)

# Pasangan positif: kombinasi dari foto dalam satu ID
for person_id, photos in grouped_by_id.items():
    positive_pairs.extend(list(itertools.combinations(photos, 2)))

# Pasangan negatif: kombinasi satu foto dari tiap ID
representative_photos = [photos[0] for photos in grouped_by_id.values()]
negative_pairs.extend(list(itertools.combinations(representative_photos, 2)))

all_pairs = positive_pairs + negative_pairs
random.shuffle(all_pairs)
print(f"Total pasangan yang akan diuji: {len(all_pairs)} ({len(positive_pairs)} Positif, {len(negative_pairs)} Negatif).")

# --- LANGKAH 3: PROSES VERIFIKASI UTAMA ---
results = []
total_tasks = len(all_pairs) * len(MODELS) * len(BACKENDS) * len(METRICS)
current_task = 0
start_time = time.time()

print(f"\nMemulai proses verifikasi untuk {total_tasks} tugas...")

for img1_name, img2_name in all_pairs:
    img1_path = os.path.join(DATASET_FOLDER, img1_name)
    img2_path = os.path.join(DATASET_FOLDER, img2_name)

    id1 = filename_to_id.get(img1_name)
    id2 = filename_to_id.get(img2_name)
    is_same_person = (id1 == id2)

    for model in MODELS:
        for backend in BACKENDS:
            for metric in METRICS:
                current_task += 1
                elapsed_time = time.time() - start_time
                avg_time_per_task = elapsed_time / current_task
                remaining_tasks = total_tasks - current_task
                eta_seconds = int(avg_time_per_task * remaining_tasks)
                eta_minutes = eta_seconds // 60
                eta_secs = eta_seconds % 60
                progress = (current_task / total_tasks) * 100

                print(f"[{current_task}/{total_tasks} | {progress:.2f}%] {img1_name} vs {img2_name} | {model} | {backend} | Metric: {metric} | ETA: {eta_minutes}m {eta_secs}s")

                try:
                    result = DeepFace.verify(
                        img1_path=img1_path, img2_path=img2_path, model_name=model,
                        detector_backend=backend, distance_metric=metric, enforce_detection=True
                    )

                    distance = result.get("distance", 1.0)
                    similarity = "N/A"
                    if metric == 'cosine':
                        similarity = round((1 - min(1.0, distance)) * 100, 2)

                    verified_status = result.get("verified", False)
                    result_type = get_result_type(is_same_person, verified_status)

                    results.append({
                        "img1": img1_name,
                        "img2": img2_name,
                        "is_same_person": is_same_person,
                        "model": model,
                        "backend": backend,
                        "similarity_metric": metric,
                        "distance": distance,
                        "threshold": result.get("threshold"),
                        "similarity": similarity,
                        "verified": verified_status,
                        "result_type": result_type
                    })
                except Exception as e:
                    print(f"  [x] GAGAL: {e}")
                    # Kalau mau log error detail, bisa ditambahkan log to file

# --- LANGKAH 4: SIMPAN HASIL ---
df = pd.DataFrame(results)
df.to_csv(OUTPUT_CSV, index=False)
print(f"\n✅ Proses Selesai. Hasil disimpan ke: {OUTPUT_CSV}")


Total pasangan yang akan diuji: 495 (450 Positif, 45 Negatif).

Memulai proses verifikasi untuk 11880 tugas...
[1/11880 | 0.01%] 019015.jpg vs 029414.jpg | Facenet | opencv | Metric: cosine | ETA: 0m 0s
  [x] GAGAL: Exception while processing img1_path
[2/11880 | 0.02%] 019015.jpg vs 029414.jpg | Facenet | opencv | Metric: euclidean | ETA: 309m 23s
  [x] GAGAL: Exception while processing img1_path
[3/11880 | 0.03%] 019015.jpg vs 029414.jpg | Facenet | ssd | Metric: cosine | ETA: 207m 4s
[4/11880 | 0.03%] 019015.jpg vs 029414.jpg | Facenet | ssd | Metric: euclidean | ETA: 186m 15s
[5/11880 | 0.04%] 019015.jpg vs 029414.jpg | Facenet | mtcnn | Metric: cosine | ETA: 164m 6s
[6/11880 | 0.05%] 019015.jpg vs 029414.jpg | Facenet | mtcnn | Metric: euclidean | ETA: 167m 17s
[7/11880 | 0.06%] 019015.jpg vs 029414.jpg | Facenet | retinaface | Metric: cosine | ETA: 157m 18s
[8/11880 | 0.07%] 019015.jpg vs 029414.jpg | Facenet | retinaface | Metric: euclidean | ETA: 392m 21s
[9/11880 | 0.08%] 019

In [4]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# --- Nama file input dan output ---
INPUT_CSV = "100photo_comparison_results.csv"
SUMMARY_CSV = "performance_summary.csv"

# --- LANGKAH 1: MEMBACA DAN MEMBERSIHKAN DATA ---
try:
    df = pd.read_csv(INPUT_CSV)
    print(f"Berhasil membaca {len(df)} baris dari {INPUT_CSV}")
except FileNotFoundError:
    print(f"ERROR: File '{INPUT_CSV}' tidak ditemukan. Pastikan skrip Tahap 1 sudah selesai dijalankan.")
    exit()

# Membersihkan data dari baris yang error saat eksekusi
df.dropna(inplace=True)
df = df[df['result_type'] != 'ERROR'].copy() # Menggunakan .copy() untuk menghindari SettingWithCopyWarning

# Konversi kolom boolean jika perlu
df['verified'] = df['verified'].astype(bool)
df['is_same_person'] = df['is_same_person'].astype(bool)

print(f"Jumlah data setelah dibersihkan: {len(df)} baris.")

# --- LANGKAH 2: MENGHITUNG METRIK UNTUK SETIAP KONFIGURASI ---

# Kelompokkan data berdasarkan model, backend, dan metrik
grouped = df.groupby(['model', 'backend', 'similarity_metric'])

summary_data = []

# Loop untuk setiap grup konfigurasi
for name, group in grouped:
    model_name, backend_name, metric_name = name
    
    # y_true adalah label asli (apakah orangnya benar-benar sama)
    # y_pred adalah prediksi model (apakah diverifikasi sebagai sama)
    y_true = group['is_same_person']
    y_pred = group['verified']
    
    # Jika tidak ada sampel positif, metrik recall/precision/f1 tidak terdefinisi
    if y_true.sum() == 0:
        print(f"Peringatan: Tidak ada sampel positif untuk grup {name}. Metrik tidak bisa dihitung.")
        continue

    # Hitung metrik utama
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    
    # Hitung TP, TN, FP, FN secara manual dari confusion matrix untuk validasi
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[False, True]).ravel()

    summary_data.append({
        "Model": model_name,
        "Backend": backend_name,
        "Metric": metric_name,
        "Accuracy": round(accuracy, 4),
        "Precision": round(precision, 4),
        "Recall (Sensitivity)": round(recall, 4), # Recall juga disebut Sensitivity atau True Positive Rate
        "F1-Score": round(f1, 4),
        "TP": tp,
        "TN": tn,
        "FP": fp,
        "FN": fn
    })

# --- LANGKAH 3: TAMPILKAN DAN SIMPAN HASIL AKHIR ---
if summary_data:
    df_summary = pd.DataFrame(summary_data)
    
    # Urutkan berdasarkan F1-Score (metrik terbaik untuk keseimbangan precision-recall)
    df_summary_sorted = df_summary.sort_values(by="F1-Score", ascending=False)
    
    print("\n" + "="*80)
    print("           RINGKASAN PERFORMA PIPELINE VERIFIKASI WAJAH")
    print("="*80)
    # Gunakan to_string() agar semua kolom ditampilkan
    print(df_summary_sorted.to_string())
    
    # Simpan tabel ringkasan ke file CSV
    df_summary_sorted.to_csv(SUMMARY_CSV, index=False)
    print(f"\n✅ Ringkasan performa disimpan ke '{SUMMARY_CSV}'")
else:
    print("\nTidak ada data valid untuk dianalisis.")

Berhasil membaca 11364 baris dari 100photo_comparison_results.csv
Jumlah data setelah dibersihkan: 5682 baris.

           RINGKASAN PERFORMA PIPELINE VERIFIKASI WAJAH
       Model     Backend  Metric  Accuracy  Precision  Recall (Sensitivity)  F1-Score   TP  TN  FP   FN
1    ArcFace      opencv  cosine    0.8924     1.0000                0.8845    0.9387  337  28   0   44
0    ArcFace       mtcnn  cosine    0.8465     0.9973                0.8333    0.9080  375  44   1   75
2    ArcFace  retinaface  cosine    0.8303     0.9973                0.8156    0.8973  367  44   1   83
3    ArcFace         ssd  cosine    0.8081     0.9972                0.7911    0.8823  356  44   1   94
5    Facenet      opencv  cosine    0.6161     1.0000                0.5879    0.7405  224  28   0  157
6    Facenet  retinaface  cosine    0.6202     1.0000                0.5822    0.7360  262  45   0  188
7    Facenet         ssd  cosine    0.5980     1.0000                0.5578    0.7161  251  45   0  199
