svm_model = SVC(kernel='rbf', C=10.0, gamma='scale', probability=True)
knn_model = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='manhattan')


In [1]:
# ============== LANGKAH 1: INISIALISASI & SETUP =============
import os
import sys
import glob
import cv2
import numpy as np
import json
import time
import pickle
import torch
from collections import defaultdict
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pandas as pd
from deepface import DeepFace # Diperlukan untuk get_embedding gallery
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, recall_score, f1_score
import seaborn as sns

print("Semua pustaka berhasil diimpor.")

# Definisikan kelas Encoder kustom (jika diperlukan untuk menyimpan hasil baru)
class NumpyJSONEncoder(json.JSONEncoder):
    """ Custom encoder for numpy data types """
    def default(self, obj):
        if isinstance(obj, np.integer): return int(obj)
        elif isinstance(obj, np.floating): return float(obj)
        elif isinstance(obj, np.ndarray): return obj.tolist()
        elif isinstance(obj, np.bool_): return bool(obj)
        else: return super(NumpyJSONEncoder, self).default(obj)

# --- Konfigurasi Path ---
BASE_DIR = os.path.abspath('.')
GALLERY_PATH = os.path.join(BASE_DIR, 'data', 'gallery6.2') # Path galeri tetap diperlukan
RESULTS_PATH = os.path.join(BASE_DIR, 'results_v6.4.3_recognition') # Path output analisis
CACHE_PATH = os.path.join(BASE_DIR, 'cache_v6.4.3_recognition') # Path cache jika diperlukan
# >>>>> PERUBAHAN PATH MODEL BARU <<<<<
NEW_MODELS_PATH = os.path.join(BASE_DIR, 'models_v6.4.3_tuned') # Path BARU untuk menyimpan model hasil tuning
PROBE_FEATURES_PATH = os.path.join(BASE_DIR, 'features_v6.4') # Path fitur probe dari v6.4

# Pastikan semua direktori ada
for path in [RESULTS_PATH, CACHE_PATH, NEW_MODELS_PATH]: # Tambahkan NEW_MODELS_PATH
    os.makedirs(path, exist_ok=True)
print("Direktori telah disiapkan.")

# --- Setup Device ---
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Menggunakan device: {DEVICE}')


Semua pustaka berhasil diimpor.
Direktori telah disiapkan.
Menggunakan device: cuda


In [2]:
# ============== LANGKAH 2: DEFINISI FUNGSI UTILITAS =============

# Fungsi get_embedding diperlukan untuk melatih classifier dari galeri
def get_embedding(image_path_or_array, model_name='ArcFace', detector_backend='retinaface') -> list | None:
    """Mengekstrak embedding wajah dari path gambar atau array (dengan deteksi)."""
    try:
        embedding_objs = DeepFace.represent(
            img_path=image_path_or_array, model_name=model_name,
            enforce_detection=True, detector_backend=detector_backend
        )
        if embedding_objs and isinstance(embedding_objs, list):
            return embedding_objs[0]['embedding']
        return None
    except Exception as e:
        # print(f"Error saat get_embedding: {e}") # Uncomment untuk debug
        return None

# Fungsi Cosine Similarity diperlukan untuk perbandingan
def cosine_similarity_prediction(query_embedding, gallery_embeddings, gallery_labels, threshold=0.5):
    """Prediksi menggunakan cosine similarity."""
    if query_embedding is None or not gallery_embeddings:
        return "unknown", 0.0, False

    query_embedding = np.array(query_embedding)
    query_norm = np.linalg.norm(query_embedding)
    if query_norm == 0:
        return "unknown", 0.0, False
    query_embedding = query_embedding / query_norm

    similarities = []
    valid_gallery_labels = []
    for gallery_emb, label in zip(gallery_embeddings, gallery_labels):
        gallery_emb = np.array(gallery_emb)
        gallery_norm = np.linalg.norm(gallery_emb)
        if gallery_norm > 0:
            gallery_emb = gallery_emb / gallery_norm
            similarity = np.dot(query_embedding, gallery_emb)
            similarities.append(similarity)
            valid_gallery_labels.append(label)

    if not similarities:
        return "unknown", 0.0, False

    max_similarity = np.max(similarities)
    max_idx = np.argmax(similarities)
    predicted_label = valid_gallery_labels[max_idx]
    is_recognized = max_similarity > threshold

    if np.isnan(max_similarity):
         return "unknown", 0.0, False

    return predicted_label, float(max_similarity), bool(is_recognized)

def cosine_similarity_top_n(query_embedding, gallery_embeddings, gallery_labels, top_n=5):
    """
    Mengembalikan Top-N label beserta skor similarity-nya.
    """
    if query_embedding is None or not gallery_embeddings:
        return [], False

    query_embedding = np.array(query_embedding)
    query_norm = np.linalg.norm(query_embedding)
    if query_norm == 0:
        return [], False
    query_embedding = query_embedding / query_norm

    # Hitung similarity untuk semua kandidat di galeri
    scores = []
    for gallery_emb, label in zip(gallery_embeddings, gallery_labels):
        gallery_emb = np.array(gallery_emb)
        gallery_norm = np.linalg.norm(gallery_emb)
        if gallery_norm > 0:
            gallery_emb = gallery_emb / gallery_norm
            sim = np.dot(query_embedding, gallery_emb)
            scores.append((label, sim))

    # Urutkan dari similarity terbesar ke terkecil
    scores.sort(key=lambda x: x[1], reverse=True)
    
    # Ambil Top-N
    # Catatan: Ini logic "Nearest Neighbor". Jika ada banyak foto 'subject_a' di galeri,
    # Top-5 bisa jadi ['a', 'a', 'a', 'b', 'c'].
    # Jika ground_truth 'a' ada di list ini, maka hitungannya benar.
    top_results = scores[:top_n]
    
    return top_results


print("Fungsi-fungsi utilitas siap digunakan.")

Fungsi-fungsi utilitas siap digunakan.


In [3]:
# ============== LANGKAH 3: LATIH CLASSIFIER & PERSIAPAN GALERI =============

# --- Selalu Latih Classifier dari Galeri ---
print("Mempersiapkan data latih dari galeri...")
X_train = []
y_train_labels = []
gallery_files = glob.glob(os.path.join(GALLERY_PATH, '*.jpg')) # Sesuaikan ekstensi

if not gallery_files:
     print(f"ERROR: Tidak ada file ditemukan di {GALLERY_PATH}. Pelatihan dibatalkan.")
     sys.exit()

for g_file in tqdm(gallery_files, desc="Membangun Dataset Latih"):
    subject_id = os.path.basename(g_file).split('_')[0]
    embedding = get_embedding(g_file) # Gunakan fungsi get_embedding
    if embedding is not None:
        X_train.append(embedding)
        y_train_labels.append(subject_id)

if not X_train:
     print("ERROR: Tidak ada embedding yang berhasil diekstrak dari galeri. Pelatihan dibatalkan.")
     sys.exit()

le = LabelEncoder()
y_train = le.fit_transform(y_train_labels)
labels = le.classes_
print(f"\nDataset latih siap dengan {len(X_train)} sampel.")
print(f"Subjek yang ditemukan: {labels}")

# Latih KNN
print("\nMelatih model K-Nearest Neighbors (KNN)...")
knn_model = KNeighborsClassifier(n_neighbors=1, weights='distance', metric='euclidean')
knn_model.fit(X_train, y_train)
print("Model KNN selesai dilatih.")

# Latih SVM
print("Melatih model Support Vector Machine (SVM)...")
svm_model = SVC(kernel='linear', probability=True, C=1000.0)
svm_model.fit(X_train, y_train)
print("Model SVM selesai dilatih.")

# >>>>> PERUBAHAN PATH PENYIMPANAN <<<<<
knn_model_path_new = os.path.join(NEW_MODELS_PATH, 'knn_model_tuned.pkl')
svm_model_path_new = os.path.join(NEW_MODELS_PATH, 'svm_model_tuned.pkl')
le_path_new = os.path.join(NEW_MODELS_PATH, 'label_encoder.pkl')

with open(knn_model_path_new, 'wb') as f: pickle.dump(knn_model, f)
with open(svm_model_path_new, 'wb') as f: pickle.dump(svm_model, f)
with open(le_path_new, 'wb') as f: pickle.dump(le, f)
print(f"Model KNN, SVM, dan LabelEncoder baru berhasil disimpan di: {NEW_MODELS_PATH}")


# ============== PERSIAPAN GALLERY EMBEDDINGS UNTUK COSINE SIMILARITY =============
print("\nMempersiapkan gallery embeddings untuk cosine similarity...")
gallery_embeddings = []
gallery_labels = []
gallery_files_cosine = glob.glob(os.path.join(GALLERY_PATH, '*.jpg')) # Sesuaikan ekstensi

if not gallery_files_cosine:
     print(f"ERROR: Tidak ada file ditemukan di {GALLERY_PATH} untuk cosine similarity.")
     sys.exit()

# Coba muat cache embedding galeri jika ada untuk mempercepat
gallery_cache_path = os.path.join(CACHE_PATH, 'gallery_embeddings_cache.pkl')
try:
    with open(gallery_cache_path, 'rb') as f:
        gallery_embeddings, gallery_labels = pickle.load(f)
    print(f"Cache gallery embeddings dimuat ({len(gallery_embeddings)} sampel).")
except FileNotFoundError:
    print("Cache gallery embeddings tidak ditemukan, membuat ulang...")
    for g_file in tqdm(gallery_files_cosine, desc="Membangun Gallery Embeddings"):
        subject_id = os.path.basename(g_file).split('_')[0]
        embedding = get_embedding(g_file)
        if embedding is not None:
            gallery_embeddings.append(embedding)
            gallery_labels.append(subject_id)
    # Simpan ke cache
    try:
        with open(gallery_cache_path, 'wb') as f:
            pickle.dump((gallery_embeddings, gallery_labels), f)
        print("Cache gallery embeddings disimpan.")
    except Exception as e:
        print(f"Gagal menyimpan cache gallery embeddings: {e}")

if not gallery_embeddings:
     print("ERROR: Tidak ada embedding galeri yang berhasil dibuat untuk cosine similarity.")
     sys.exit()
else:
     print(f"Gallery embeddings siap dengan {len(gallery_embeddings)} sampel.")

Mempersiapkan data latih dari galeri...


Membangun Dataset Latih:   0%|          | 0/55 [00:00<?, ?it/s]


Dataset latih siap dengan 55 sampel.
Subjek yang ditemukan: ['a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k']

Melatih model K-Nearest Neighbors (KNN)...
Model KNN selesai dilatih.
Melatih model Support Vector Machine (SVM)...
Model SVM selesai dilatih.
Model KNN, SVM, dan LabelEncoder baru berhasil disimpan di: d:\UNSRI_DATA\_SKRIPSI\PROGRAM\v1\pipeline_skripsi\models_v6.4.3_tuned

Mempersiapkan gallery embeddings untuk cosine similarity...
Cache gallery embeddings dimuat (55 sampel).
Gallery embeddings siap dengan 55 sampel.


In [4]:
# ============== LANGKAH 4 (REVISI LENGKAP & FIXED): MUAT FITUR PROBE & LAKUKAN REKOGNISI =============
# Versi ini menganalisis KEDUA embedding: original dan restored

features_file_path = os.path.join(PROBE_FEATURES_PATH, 'probe_features_v6.4.json')
try:
    with open(features_file_path, 'r', encoding='utf-8') as f:
        probe_features = json.load(f)
    print(f"\nBerhasil memuat {len(probe_features)} fitur probe dari {features_file_path}")
except FileNotFoundError:
    print(f"ERROR: File fitur probe tidak ditemukan di {features_file_path}.")
    probe_features = []
except json.JSONDecodeError:
    print(f"ERROR: Gagal mem-parsing file JSON fitur probe: {features_file_path}.")
    probe_features = []

# >>>>> DEBUG: Memeriksa struktur data pertama <<<<<
if probe_features:
    print("\n=== DEBUG: Memeriksa struktur data pertama ===")
    first_entry = probe_features[0]
    # print(f"Keys dalam entry pertama: {first_entry.keys()}")
    
    emb_orig = first_entry.get('embedding_original')
    emb_rest = first_entry.get('embedding_restored')
    
    print(f"\nTipe embedding_original: {type(emb_orig)}")
    if emb_orig and isinstance(emb_orig, list):
        print(f"Panjang embedding_original: {len(emb_orig)}")
    
    print(f"\nTipe embedding_restored: {type(emb_rest)}")
    if emb_rest and isinstance(emb_rest, list):
        print(f"Panjang embedding_restored: {len(emb_rest)}")
    print("=" * 50)

recognition_results = []

# Counters untuk statistik
stats = {
    'total': 0,
    'original_valid': 0,
    'restored_valid': 0,
    'both_valid': 0,
    'restoration_succeeded': 0
}

if not probe_features:
    print("Tidak ada fitur probe untuk diproses.")
else:
    print("\nMemulai proses rekognisi untuk ORIGINAL dan RESTORED...")
    
    for idx, feature_entry in enumerate(tqdm(probe_features, desc="Melakukan Rekognisi")):
        stats['total'] += 1
        
        # >>>>> AMBIL KEDUA EMBEDDING <<<<<
        probe_embedding_original = feature_entry.get('embedding_original')
        probe_embedding_restored = feature_entry.get('embedding_restored')
        
        ground_truth = feature_entry.get('ground_truth', 'unknown')
        filename = feature_entry.get('file', 'unknown')
        metadata = feature_entry.get('metadata', {})
        restoration_succeeded = feature_entry.get('restoration_succeeded', False)
        
        if restoration_succeeded:
            stats['restoration_succeeded'] += 1
        
        # >>>>> STRUKTUR RESULT ENTRY <<<<<
        result_entry = {
            'file': filename,
            'ground_truth': ground_truth,
            'metadata': metadata,
            'restoration_succeeded': restoration_succeeded,
            
            # Default values (akan diupdate fungsi helper)
            'embedding_original_found': False,
            'embedding_restored_found': False
        }
        
        # >>>>> FUNGSI HELPER UNTUK PREDIKSI (TOP-N) <<<<<
        def predict_all_models(embedding, suffix=''):
            results = {}
            
            # --- Validasi Embedding ---
            has_valid = False
            if isinstance(embedding, list) and len(embedding) > 0: has_valid = True
            elif isinstance(embedding, np.ndarray) and embedding.size > 0: has_valid = True
            
            if not has_valid:
                for k in range(1, 6):
                    results[f'knn_top{k}{suffix}'] = False
                    results[f'svm_top{k}{suffix}'] = False
                    results[f'cosine_top{k}{suffix}'] = False
                results[f'embedding{suffix}_found'] = False
                # Tetap isi prediction single value agar tidak error saat akses key nanti
                results[f'prediction_knn{suffix}'] = 'unknown'
                results[f'prediction_svm{suffix}'] = 'unknown'
                results[f'prediction_cosine{suffix}'] = 'unknown'
                return results

            results[f'embedding{suffix}_found'] = True
            embedding_np = np.array(embedding).reshape(1, -1)
            
            # 1. PREDIKSI KNN
            try:
                knn_probs = knn_model.predict_proba(embedding_np)[0]
                top_k_indices = np.argsort(knn_probs)[::-1][:5]
                top_k_labels = le.inverse_transform(top_k_indices)
                for k in range(1, 6):
                    is_correct = ground_truth in top_k_labels[:k]
                    results[f'knn_top{k}{suffix}'] = is_correct
                results[f'prediction_knn{suffix}'] = top_k_labels[0]
            except Exception:
                for k in range(1, 6): results[f'knn_top{k}{suffix}'] = False
                results[f'prediction_knn{suffix}'] = 'unknown'

            # 2. PREDIKSI SVM
            try:
                svm_probs = svm_model.predict_proba(embedding_np)[0]
                top_k_indices = np.argsort(svm_probs)[::-1][:5]
                top_k_labels = le.inverse_transform(top_k_indices)
                for k in range(1, 6):
                    is_correct = ground_truth in top_k_labels[:k]
                    results[f'svm_top{k}{suffix}'] = is_correct
                results[f'prediction_svm{suffix}'] = top_k_labels[0]
            except Exception:
                for k in range(1, 6): results[f'svm_top{k}{suffix}'] = False
                results[f'prediction_svm{suffix}'] = 'unknown'

            # 3. PREDIKSI COSINE
            try:
                top_n_results = cosine_similarity_top_n(embedding, gallery_embeddings, gallery_labels, top_n=5)
                top_k_labels = [label for label, score in top_n_results]
                for k in range(1, 6):
                    if len(top_k_labels) >= k: is_correct = ground_truth in top_k_labels[:k]
                    else: is_correct = ground_truth in top_k_labels
                    results[f'cosine_top{k}{suffix}'] = is_correct

                if top_n_results:
                    results[f'prediction_cosine{suffix}'] = top_n_results[0][0]
                    results[f'similarity_cosine{suffix}'] = top_n_results[0][1]
                else:
                    results[f'prediction_cosine{suffix}'] = 'unknown'
            except Exception:
                for k in range(1, 6): results[f'cosine_top{k}{suffix}'] = False
                results[f'prediction_cosine{suffix}'] = 'unknown'

            return results
        
        # ======================================================================
        # >>>>> BAGIAN YANG HILANG (EKSEKUSI FUNGSI) <<<<<
        # ======================================================================
        
        # 1. Proses Original
        original_results = predict_all_models(probe_embedding_original, '_original')
        result_entry.update(original_results)
        if original_results.get('embedding_original_found'):
            stats['original_valid'] += 1
            
        # 2. Proses Restored
        restored_results = predict_all_models(probe_embedding_restored, '_restored')
        result_entry.update(restored_results)
        if restored_results.get('embedding_restored_found'):
            stats['restored_valid'] += 1
            
        # 3. Hitung Both Valid
        if original_results.get('embedding_original_found') and restored_results.get('embedding_restored_found'):
            stats['both_valid'] += 1
            
        recognition_results.append(result_entry)
    
    # >>>>> TAMPILKAN STATISTIK <<<<<
    print(f"\n{'='*60}")
    print("STATISTIK EMBEDDING".center(60))
    print(f"{'='*60}")
    print(f"Total data probe               : {stats['total']}")
    print(f"Restorasi berhasil             : {stats['restoration_succeeded']} ({stats['restoration_succeeded']/stats['total']*100:.1f}%)")
    print(f"\nEmbedding ORIGINAL valid       : {stats['original_valid']} ({stats['original_valid']/stats['total']*100:.1f}%)")
    print(f"Embedding RESTORED valid       : {stats['restored_valid']} ({stats['restored_valid']/stats['total']*100:.1f}%)")
    print(f"Kedua embedding valid          : {stats['both_valid']} ({stats['both_valid']/stats['total']*100:.1f}%)")
    print(f"{'='*60}\n")
    
    # --- Simpan Hasil Rekognisi ---
    results_file_path = os.path.join(RESULTS_PATH, 'recognition_results_v6.4.3_comparison.json')
    try:
        with open(results_file_path, 'w', encoding='utf-8') as f:
            json.dump(recognition_results, f, indent=4, cls=NumpyJSONEncoder)
        print(f"Hasil rekognisi berhasil disimpan ke: {results_file_path}")
    except Exception as e:
        print(f"ERROR saat menyimpan hasil rekognisi: {e}")


Berhasil memuat 1364 fitur probe dari d:\UNSRI_DATA\_SKRIPSI\PROGRAM\v1\pipeline_skripsi\features_v6.4\probe_features_v6.4.json

=== DEBUG: Memeriksa struktur data pertama ===

Tipe embedding_original: <class 'list'>
Panjang embedding_original: 512

Tipe embedding_restored: <class 'list'>
Panjang embedding_restored: 512

Memulai proses rekognisi untuk ORIGINAL dan RESTORED...


Melakukan Rekognisi:   0%|          | 0/1364 [00:00<?, ?it/s]


                    STATISTIK EMBEDDING                     
Total data probe               : 1364
Restorasi berhasil             : 1364 (100.0%)

Embedding ORIGINAL valid       : 1351 (99.0%)
Embedding RESTORED valid       : 1364 (100.0%)
Kedua embedding valid          : 1351 (99.0%)

Hasil rekognisi berhasil disimpan ke: d:\UNSRI_DATA\_SKRIPSI\PROGRAM\v1\pipeline_skripsi\results_v6.4.3_recognition\recognition_results_v6.4.3_comparison.json


In [5]:
if not recognition_results:
    print("\nTidak ada hasil rekognisi untuk dianalisis.")
else:
    df_results = pd.DataFrame(recognition_results)
    
    print(f"\n{'='*100}")
    print("MEMULAI ANALISIS PERBANDINGAN ORIGINAL VS RESTORED (TOP-1 s/d TOP-5)".center(100))
    print(f"{'='*100}\n")
    
    # >>>>> FILTER DATASET <<<<<
    df_original = df_results[df_results['embedding_original_found'] == True].copy()
    df_restored = df_results[df_results['embedding_restored_found'] == True].copy()
    
    if df_original.empty and df_restored.empty:
        print("Tidak ada hasil rekognisi yang memiliki embedding untuk dianalisis.")
    else:
        # Prepare metadata columns
        for df in [df_original, df_restored]:
            if not df.empty:
                df['distance_m'] = df['metadata'].apply(lambda x: x.get('distance_m') if isinstance(x, dict) else None)
                df['height_m'] = df['metadata'].apply(lambda x: x.get('height_m') if isinstance(x, dict) else None)
        
        print(f"Data dengan embedding ORIGINAL valid: {len(df_original)}")
        print(f"Data dengan embedding RESTORED valid: {len(df_restored)}")

        # >>>>> DEFINISI SKENARIO <<<<<
        scenarios = [
            {"name": "Jarak Dekat (< 7m)", "filter_key": "distance_m", "condition": lambda x: x < 7},
            {"name": "Jarak Menengah (7-12m)", "filter_key": "distance_m", "condition": lambda x: 7 <= x < 12},
            {"name": "Jarak Jauh (>= 12m)", "filter_key": "distance_m", "condition": lambda x: x >= 12},
            {"name": "Ketinggian 1.5m", "filter_key": "height_m", "condition": lambda x: x == 1.5},
            {"name": "Ketinggian 3m", "filter_key": "height_m", "condition": lambda x: x == 3},
            {"name": "Ketinggian 4m", "filter_key": "height_m", "condition": lambda x: x == 4},
            {"name": "Ketinggian 5m", "filter_key": "height_m", "condition": lambda x: x == 5},
            {"name": "Semua Data Uji", "filter_key": None, "condition": None}
        ]
        
        models = ['knn', 'svm', 'cosine']
        model_names = {
            'knn': 'K-Nearest Neighbors (KNN)',
            'svm': 'Support Vector Machine (SVM)',
            'cosine': 'Cosine Similarity'
        }

        # >>>>> FUNGSI HITUNG TOP-N <<<<<
        def get_top_n_accuracy(df, model, suffix, n=5):
            """Menghasilkan list akurasi [Top-1, Top-2, ..., Top-N]"""
            accuracies = []
            for k in range(1, n + 1):
                # Nama kolom harus sesuai dengan output Langkah 4 (misal: knn_top1_original)
                col_name = f'{model}_top{k}{suffix}'
                
                # Fallback jika kolom top-n tidak ditemukan (misal: pakai is_correct biasa untuk top-1)
                fallback_col = f'is_correct_{model}{suffix}'
                
                if col_name in df.columns:
                    acc = df[col_name].mean() * 100
                    accuracies.append(f"{acc:.2f}%")
                elif k == 1 and fallback_col in df.columns:
                    acc = df[fallback_col].mean() * 100
                    accuracies.append(f"{acc:.2f}%")
                else:
                    accuracies.append("N/A")
            return accuracies

        # >>>>> GENERATE TABEL TOP-N <<<<<
        for model in models:
            table_data = []
            
            for scenario in scenarios:
                scenario_name = scenario["name"]
                
                # Filter Data
                if scenario["filter_key"] is None:
                    filt_orig = df_original
                    filt_rest = df_restored
                else:
                    key = scenario["filter_key"]
                    cond = scenario["condition"]
                    filt_orig = df_original[df_original[key].apply(cond)] if not df_original.empty else pd.DataFrame()
                    filt_rest = df_restored[df_restored[key].apply(cond)] if not df_restored.empty else pd.DataFrame()
                
                # --- Row untuk Original ---
                if not filt_orig.empty:
                    accs_orig = get_top_n_accuracy(filt_orig, model, '_original')
                    row_orig = [scenario_name, 'Original'] + accs_orig
                    table_data.append(row_orig)
                else:
                    table_data.append([scenario_name, 'Original'] + ['N/A']*5)
                
                # --- Row untuk Restored ---
                if not filt_rest.empty:
                    accs_rest = get_top_n_accuracy(filt_rest, model, '_restored')
                    row_rest = [scenario_name, 'Restored'] + accs_rest
                    table_data.append(row_rest)
                else:
                    table_data.append([scenario_name, 'Restored'] + ['N/A']*5)
            
            # --- Buat DataFrame & Tampilkan ---
            columns_top_n = ['Skenario', 'Kondisi', 'Top-1', 'Top-2', 'Top-3', 'Top-4', 'Top-5']
            df_table = pd.DataFrame(table_data, columns=columns_top_n)
            
            print(f"\n{'='*120}")
            print(f"TABEL AKURASI TOP-N: {model_names[model]}".center(120))
            print(f"{'='*120}")
            print(df_table.to_string(index=False))
            print(f"{'='*120}\n")
            
            # Simpan CSV
            save_path = os.path.join(RESULTS_PATH, f'top_n_accuracy_{model}.csv')
            df_table.to_csv(save_path, index=False)
            print(f"Tabel disimpan ke: {save_path}")

    print("\n" + "="*80)
    print("ANALISIS SELESAI".center(80))
    print("="*80)


                MEMULAI ANALISIS PERBANDINGAN ORIGINAL VS RESTORED (TOP-1 s/d TOP-5)                

Data dengan embedding ORIGINAL valid: 1351
Data dengan embedding RESTORED valid: 1364

                                     TABEL AKURASI TOP-N: K-Nearest Neighbors (KNN)                                     
              Skenario  Kondisi  Top-1  Top-2  Top-3  Top-4  Top-5
    Jarak Dekat (< 7m) Original 84.10% 84.33% 84.33% 87.10% 90.78%
    Jarak Dekat (< 7m) Restored 77.27% 77.73% 78.18% 82.05% 86.14%
Jarak Menengah (7-12m) Original 56.04% 57.18% 58.54% 64.46% 69.93%
Jarak Menengah (7-12m) Restored 72.95% 74.09% 74.77% 79.77% 84.77%
   Jarak Jauh (>= 12m) Original 11.30% 19.46% 28.45% 37.24% 46.03%
   Jarak Jauh (>= 12m) Restored 41.53% 46.07% 52.27% 60.54% 66.94%
       Ketinggian 1.5m Original 58.82% 61.76% 65.29% 70.59% 75.29%
       Ketinggian 1.5m Restored 65.69% 67.74% 70.97% 75.95% 81.23%
         Ketinggian 3m Original 57.23% 60.47% 64.31% 69.03% 73.16%
         Ketinggian

# ============== LANGKAH 5: ANALISIS HASIL REKOGNISI (ORIGINAL VS RESTORED) =============

if not recognition_results:

    print("\nTidak ada hasil rekognisi untuk dianalisis.")

else:

    df_results = pd.DataFrame(recognition_results)



    print(f"\n{'='*80}")

    print("MEMULAI ANALISIS PERBANDINGAN ORIGINAL VS RESTORED".center(80))

    print(f"{'='*80}\n")



    # >>>>> ANALISIS UNTUK ORIGINAL <<<<<

    df_original = df_results[df_results['embedding_original_found'] == True].copy()



    # >>>>> ANALISIS UNTUK RESTORED <<<<<

    df_restored = df_results[df_results['embedding_restored_found'] == True].copy()



    if df_original.empty and df_restored.empty:

        print("Tidak ada hasil rekognisi yang memiliki embedding untuk dianalisis.")

    else:

        # Prepare metadata columns untuk kedua dataframe

        for df in [df_original, df_restored]:

            if not df.empty:

                df['distance_m'] = df['metadata'].apply(lambda x: x.get('distance_m') if isinstance(x, dict) else None)

                df['height_m'] = df['metadata'].apply(lambda x: x.get('height_m') if isinstance(x, dict) else None)



        print(f"Data dengan embedding ORIGINAL valid: {len(df_original)}")

        print(f"Data dengan embedding RESTORED valid: {len(df_restored)}")



        # >>>>> FUNGSI HELPER UNTUK ANALISIS <<<<<

        def print_classification_report(title, y_true, y_pred, labels_list, save_cm=True):

            """Cetak classification report dan confusion matrix"""

            print(f"\n{'='*80}")

            print(f"{title}".center(80))

            print(f"{'='*80}")

            print(classification_report(y_true, y_pred, labels=labels_list, zero_division=0))



            if save_cm:

                try:

                    cm = confusion_matrix(y_true, y_pred, labels=labels_list)

                    plt.figure(figsize=(10, 8))

                    sns.heatmap(cm, annot=True, fmt='d', cmap='viridis',

                                xticklabels=labels_list, yticklabels=labels_list)

                    plt.title(title)

                    plt.ylabel('Label Sebenarnya (Ground Truth)')

                    plt.xlabel('Label Prediksi')

                    plot_filename = f"{title.replace(' ', '_').replace('(', '').replace(')', '').lower()}.png"

                    plot_path = os.path.join(RESULTS_PATH, plot_filename)

                    plt.savefig(plot_path, dpi=100, bbox_inches='tight')

                    plt.close()

                    print(f"Confusion matrix disimpan ke: {plot_filename}")

                except Exception as e:

                    print(f"Gagal membuat confusion matrix: {e}")

            print("="*80)



        def calculate_metrics(df, suffix, labels_list):

            """Hitung metrik untuk satu set data"""

            metrics = {}

            for model in ['knn', 'svm', 'cosine']:

                is_correct_col = f'is_correct_{model}{suffix}'

                prediction_col = f'prediction_{model}{suffix}'



                if is_correct_col in df.columns:

                    y_true = df['ground_truth']

                    y_pred = df[prediction_col]



                    accuracy = df[is_correct_col].mean()

                    recall = recall_score(y_true, y_pred, average='macro', zero_division=0, labels=labels_list)

                    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0, labels=labels_list)



                    metrics[model] = {

                        'accuracy': accuracy,

                        'recall': recall,

                        'f1': f1

                    }

            return metrics



        # >>>>> ANALISIS OVERALL (CONFUSION MATRIX & REPORT) <<<<<



        # Get labels yang ada di data

        labels_original = sorted(df_original['ground_truth'].unique()) if not df_original.empty else []

        labels_restored = sorted(df_restored['ground_truth'].unique()) if not df_restored.empty else []



        print("\n" + "="*80)

        print("BAGIAN 1: CLASSIFICATION REPORT & CONFUSION MATRIX".center(80))

        print("="*80)



        # --- ORIGINAL ---

        if not df_original.empty:

            print("\n>>> EMBEDDING ORIGINAL <<<\n")

            print_classification_report(

                "KNN - Original Embedding",

                df_original['ground_truth'],

                df_original['prediction_knn_original'],

                labels_original

            )

            print_classification_report(

                "SVM - Original Embedding",

                df_original['ground_truth'],

                df_original['prediction_svm_original'],

                labels_original

            )

            print_classification_report(

                "Cosine Similarity - Original Embedding",

                df_original['ground_truth'],

                df_original['prediction_cosine_original'],

                labels_original

            )



        # --- RESTORED ---

        if not df_restored.empty:

            print("\n>>> EMBEDDING RESTORED <<<\n")

            print_classification_report(

                "KNN - Restored Embedding",

                df_restored['ground_truth'],

                df_restored['prediction_knn_restored'],

                labels_restored

            )

            print_classification_report(

                "SVM - Restored Embedding",

                df_restored['ground_truth'],

                df_restored['prediction_svm_restored'],

                labels_restored

            )

            print_classification_report(

                "Cosine Similarity - Restored Embedding",

                df_restored['ground_truth'],

                df_restored['prediction_cosine_restored'],

                labels_restored

            )



        # >>>>> BAGIAN 2: TABEL PERBANDINGAN BERDASARKAN SKENARIO <<<<<



        print("\n" + "="*80)

        print("BAGIAN 2: TABEL PERBANDINGAN KINERJA BERDASARKAN SKENARIO".center(80))

        print("="*80 + "\n")



        # Define scenarios

        scenarios = [

            {"name": "Jarak Dekat (< 7m)", "filter_key": "distance_m", "condition": lambda x: x < 7},

            {"name": "Jarak Menengah (7-12m)", "filter_key": "distance_m", "condition": lambda x: 7 <= x < 12},

            {"name": "Jarak Jauh (>= 12m)", "filter_key": "distance_m", "condition": lambda x: x >= 12},

            {"name": "Ketinggian 1.5m", "filter_key": "height_m", "condition": lambda x: x == 1.5},

            {"name": "Ketinggian 3m", "filter_key": "height_m", "condition": lambda x: x == 3},

            {"name": "Ketinggian 4m", "filter_key": "height_m", "condition": lambda x: x == 4},

            {"name": "Ketinggian 5m", "filter_key": "height_m", "condition": lambda x: x == 5},

            {"name": "Semua Data Uji", "filter_key": None, "condition": None}

        ]



        # Create separate tables for each model

        models = ['knn', 'svm', 'cosine']

        model_names = {

            'knn': 'K-Nearest Neighbors (KNN)',

            'svm': 'Support Vector Machine (SVM)',

            'cosine': 'Cosine Similarity'

        }



        all_tables = {}



        for model in models:

            comparison_data = []



            for scenario in scenarios:

                scenario_name = scenario["name"]



                # Filter data untuk scenario

                if scenario["filter_key"] is None:

                    # Semua data

                    filtered_orig = df_original

                    filtered_rest = df_restored

                else:

                    filter_key = scenario["filter_key"]

                    condition = scenario["condition"]

                    filtered_orig = df_original[

                        df_original[filter_key].notna() &

                        df_original[filter_key].apply(condition)

                    ] if not df_original.empty else pd.DataFrame()

                    filtered_rest = df_restored[

                        df_restored[filter_key].notna() &

                        df_restored[filter_key].apply(condition)

                    ] if not df_restored.empty else pd.DataFrame()



                # Calculate metrics untuk ORIGINAL

                if not filtered_orig.empty:

                    labels_scenario = sorted(filtered_orig['ground_truth'].unique())

                    metrics_orig = calculate_metrics(filtered_orig, '_original', labels_scenario)



                    if model in metrics_orig:

                        acc_orig = metrics_orig[model]['accuracy']

                        rec_orig = metrics_orig[model]['recall']

                        f1_orig = metrics_orig[model]['f1']

                        comparison_data.append([

                            scenario_name,

                            'Tanpa Restorasi',

                            f'{acc_orig:.2%}',

                            f'{rec_orig:.2f}',

                            f'{f1_orig:.2f}'

                        ])

                else:

                    comparison_data.append([

                        scenario_name,

                        'Tanpa Restorasi',

                        'N/A',

                        'N/A',

                        'N/A'

                    ])



                # Calculate metrics untuk RESTORED

                if not filtered_rest.empty:

                    labels_scenario = sorted(filtered_rest['ground_truth'].unique())

                    metrics_rest = calculate_metrics(filtered_rest, '_restored', labels_scenario)



                    if model in metrics_rest:

                        acc_rest = metrics_rest[model]['accuracy']

                        rec_rest = metrics_rest[model]['recall']

                        f1_rest = metrics_rest[model]['f1']



                        # Calculate improvement

                        if not filtered_orig.empty and model in metrics_orig:

                            acc_orig = metrics_orig[model]['accuracy']

                            improvement = ((acc_rest - acc_orig) / acc_orig * 100) if acc_orig > 0 else 0

                            improvement_str = f'{improvement:+.1f}%'

                        else:

                            improvement_str = 'N/A'



                        comparison_data.append([

                            scenario_name,

                            'Dengan Restorasi',

                            f'{acc_rest:.2%}',

                            f'{rec_rest:.2f}',

                            f'{f1_rest:.2f}',

                            improvement_str

                        ])

                else:

                    comparison_data.append([

                        scenario_name,

                        'Dengan Restorasi',

                        'N/A',

                        'N/A',

                        'N/A',

                        'N/A'

                    ])



            all_tables[model] = comparison_data



        # >>>>> TAMPILKAN TABEL <<<<<

        columns = ['Skenario', 'Metode', 'Akurasi (Top-1)', 'Recall', 'F1-Score', 'Peningkatan']



        for model in models:

            if all_tables[model]:

                table_df = pd.DataFrame(all_tables[model], columns=columns)

                print(f"\n{'='*120}")

                print(f"Tabel Perbandingan Kinerja - {model_names[model]}".center(120))

                print(f"{'='*120}")

                print(table_df.to_string(index=False))

                print(f"{'='*120}\n")



                # Save to CSV

                try:

                    csv_path = os.path.join(RESULTS_PATH, f'comparison_table_{model}_v6.4.3.csv')

                    table_df.to_csv(csv_path, index=False)

                    print(f"Tabel {model_names[model]} disimpan ke: {csv_path}")

                except Exception as e:

                    print(f"Gagal menyimpan tabel {model}: {e}")



        # >>>>> BAGIAN 3: SUMMARY STATISTIK <<<<<

        print(f"\n{'='*80}")

        print("BAGIAN 3: SUMMARY PENINGKATAN KINERJA".center(80))

        print(f"{'='*80}\n")



        # Calculate overall improvement

        if not df_original.empty and not df_restored.empty:

            # Get common ground truth labels

            common_labels = sorted(set(df_original['ground_truth'].unique()) &

                                 set(df_restored['ground_truth'].unique()))



            if common_labels:

                overall_orig = calculate_metrics(df_original, '_original', common_labels)

                overall_rest = calculate_metrics(df_restored, '_restored', common_labels)



                summary_data = []

                for model in models:

                    if model in overall_orig and model in overall_rest:

                        acc_orig = overall_orig[model]['accuracy']

                        acc_rest = overall_rest[model]['accuracy']

                        improvement = ((acc_rest - acc_orig) / acc_orig * 100) if acc_orig > 0 else 0



                        summary_data.append([

                            model_names[model],

                            f'{acc_orig:.2%}',

                            f'{acc_rest:.2%}',

                            f'{improvement:+.1f}%'

                        ])



                if summary_data:

                    summary_df = pd.DataFrame(summary_data,

                                            columns=['Model', 'Akurasi Original', 'Akurasi Restored', 'Peningkatan'])

                    print(summary_df.to_string(index=False))

                    print(f"\n{'='*80}\n")



                    # Save summary

                    try:

                        summary_path = os.path.join(RESULTS_PATH, 'summary_improvement_v3.csv')

                        summary_df.to_csv(summary_path, index=False)

                        print(f"Summary peningkatan disimpan ke: {summary_path}")

                    except Exception as e:

                        print(f"Gagal menyimpan summary: {e}")

print("\n" + "="\*80)

print("ANALISIS SELESAI".center(80))

print("="\*80)
