In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
from matplotlib.patches import Patch
import matplotlib.gridspec as gridspec

# --- KONFIGURASI ---
# Ganti path ini dengan lokasi file Excel/CSV kamu yang sebenarnya
file_path = r"E:\rsa-lenovo\komparasi\hasil analisa citra.xlsx"


def load_and_preprocess_data(path):
    """
    Membaca data dan mengekstrak informasi Brand dan Body Part dari Filename.
    """
    # Cek ekstensi file untuk menentukan cara baca
    if path.endswith(".csv"):
        df = pd.read_csv(path)
    else:
        df = pd.read_excel(path)

    # Fungsi kecil untuk memecah nama file
    # Asumsi format: "Body Part_brand_parameter..."
    def extract_info(filename):
        try:
            parts = filename.split("_")
            body_part = parts[0]
            brand = parts[1]
            return body_part, brand
        except:
            return "Unknown", "Unknown"

    # Terapkan ekstraksi ke kolom baru
    df[["Body Part", "Brand"]] = df["Filename"].apply(
        lambda x: pd.Series(extract_info(x))
    )

    # Bersihkan nama brand (biar konsisten huruf kecil/besar)
    df["Brand"] = df["Brand"].str.lower().str.strip()

    return df


def validate_data(df):
    """
    Validasi data untuk memastikan kelengkapan sebelum plotting.
    Returns: (is_valid, report_dict)
    """
    print("\n" + "=" * 80)
    print("üîç VALIDASI DATA SEBELUM PLOTTING")
    print("=" * 80 + "\n")
    
    metrics = ["SNR", "CNR", "MTF50", "FWHM", "Score"]
    body_parts = df["Body Part"].unique()
    brands = df["Brand"].unique()
    
    validation_report = {
        "complete": [],
        "incomplete": [],
        "missing_brand": [],
        "missing_metrics": {}
    }
    
    print(f"üìä Total Body Parts: {len(body_parts)}")
    print(f"üè∑Ô∏è  Brands Found: {', '.join([b.capitalize() for b in brands])}")
    print(f"üìà Metrics: {', '.join(metrics)}\n")
    
    print("-" * 80)
    
    for part in body_parts:
        subset = df[df["Body Part"] == part]
        brands_in_part = subset["Brand"].unique()
        
        print(f"\nüì¶ {part}:")
        print(f"   Brands: {', '.join([b.capitalize() for b in brands_in_part])} ({len(brands_in_part)} brand(s))")
        
        # Cek apakah ada minimal 2 brand
        if len(brands_in_part) < 2:
            validation_report["incomplete"].append(part)
            validation_report["missing_brand"].append({
                "body_part": part,
                "brands": list(brands_in_part),
                "missing": [b for b in brands if b not in brands_in_part]
            })
            print(f"   ‚ö†Ô∏è  WARNING: Hanya ada {len(brands_in_part)} brand, butuh minimal 2 untuk perbandingan!")
            print(f"   Missing: {', '.join([b.capitalize() for b in brands if b not in brands_in_part])}")
            continue
        
        # Cek missing values di setiap metrik
        missing_info = []
        for metric in metrics:
            missing_count = subset[metric].isna().sum()
            if missing_count > 0:
                missing_info.append(f"{metric}: {missing_count} missing")
                if part not in validation_report["missing_metrics"]:
                    validation_report["missing_metrics"][part] = []
                validation_report["missing_metrics"][part].append({
                    "metric": metric,
                    "count": missing_count
                })
        
        if missing_info:
            print(f"   ‚ö†Ô∏è  Missing values: {', '.join(missing_info)}")
        else:
            print(f"   ‚úÖ Data lengkap!")
            validation_report["complete"].append(part)
        
        # Tampilkan jumlah sample per brand
        for brand in brands_in_part:
            count = len(subset[subset["Brand"] == brand])
            print(f"      ‚Ä¢ {brand.capitalize()}: {count} samples")
    
    print("\n" + "=" * 80)
    print("üìã RINGKASAN VALIDASI:")
    print("=" * 80)
    print(f"‚úÖ Body parts dengan data lengkap: {len(validation_report['complete'])}")
    if validation_report['complete']:
        print(f"   {', '.join(validation_report['complete'])}")
    
    print(f"\n‚ö†Ô∏è  Body parts dengan data tidak lengkap: {len(validation_report['incomplete'])}")
    if validation_report['incomplete']:
        print(f"   {', '.join(validation_report['incomplete'])}")
    
    print("\n" + "=" * 80 + "\n")
    
    # Tanya user apakah ingin melanjutkan
    if validation_report['incomplete'] or validation_report['missing_metrics']:
        print("‚ö†Ô∏è  PERHATIAN: Ada data yang tidak lengkap!")
        print("Plotting akan melewati body parts yang tidak lengkap.")
        print("\nSilakan cek data Anda jika ingin melengkapi sebelum plotting.")
        print("\n" + "=" * 80 + "\n")
    
    return validation_report


def generate_comparison_plots(df, output_folder, validation_report):
    """
    Membuat plot perbandingan bar chart untuk semua metrik.
    """
    # Daftar metrik yang ingin dibandingkan
    metrics = ["SNR", "CNR", "MTF50", "FWHM", "Score"]
    
    # Metrics yang lebih tinggi = lebih baik
    higher_is_better = {
        "SNR": True,
        "CNR": True, 
        "MTF50": True,
        "FWHM": False,  # FWHM lebih kecil biasanya lebih baik (lebih tajam)
        "Score": True
    }

    # Set style visualisasi agar 'bagus'
    sns.set_theme(style="whitegrid")
    
    # Gunakan warna yang lebih seimbang dan profesional
    palette = {
        "canon": "#2E86AB",      # Biru profesional
        "madeena": "#A23B72",    # Ungu/magenta - terlihat premium
    }

    print(f"üìÇ Hasil plot akan disimpan di: {output_folder}\n")
    print("üé® Memulai proses plotting...\n")

    # Hanya plot body parts yang lengkap
    body_parts_to_plot = validation_report['complete']
    
    if not body_parts_to_plot:
        print("‚ùå Tidak ada body part dengan data lengkap untuk di-plot!")
        return

    for idx, part in enumerate(body_parts_to_plot, 1):
        print(f"[{idx}/{len(body_parts_to_plot)}] Memproses: {part}...", end=" ")
        
        subset = df[df["Body Part"] == part]
        brands_available = subset["Brand"].unique()

        # === BAR CHART LAYOUT ===
        # Setup figure dengan GridSpec untuk layout yang lebih fleksibel
        fig = plt.figure(figsize=(18, 10))
        gs = gridspec.GridSpec(2, 3, figure=fig, hspace=0.3, wspace=0.3)
        
        fig.suptitle(
            f"Analisis Perbandingan Citra: {part}", 
            fontsize=24, 
            fontweight="bold", 
            y=0.98
        )

        # === BAR CHARTS (5 metrik dalam 2 baris) ===
        bar_positions = [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1)]
        
        for i, metric in enumerate(metrics):
            row, col = bar_positions[i]
            ax = fig.add_subplot(gs[row, col])
            
            # Hitung statistik untuk setiap brand
            stats_data = []
            for brand in brands_available:
                brand_data = subset[subset["Brand"] == brand][metric]
                stats_data.append({
                    "Brand": brand.capitalize(),
                    "Mean": brand_data.mean(),
                    "Std": brand_data.std() if len(brand_data) > 1 else 0,
                    "Count": len(brand_data)
                })
            
            stats_df = pd.DataFrame(stats_data)
            
            # Buat Bar Plot dengan error bars
            bars = ax.bar(
                stats_df["Brand"],
                stats_df["Mean"],
                yerr=stats_df["Std"],
                color=[palette.get(b.lower(), "#888888") for b in stats_df["Brand"]],
                capsize=8,
                alpha=0.85,
                edgecolor="black",
                linewidth=1.5
            )
            
            # Percantik Plot
            ax.set_title(metric, fontsize=14, fontweight="bold", pad=8)
            ax.set_xlabel("Brand", fontsize=10)
            ax.set_ylabel("Value", fontsize=10)
            ax.grid(axis='y', alpha=0.3, linestyle='--')
            
            # Tambahkan nilai di atas batang
            for bar, mean, std in zip(bars, stats_df["Mean"], stats_df["Std"]):
                height = bar.get_height()
                if std > 0:
                    label_text = f'{mean:.2f}\n¬±{std:.2f}'
                else:
                    label_text = f'{mean:.2f}'
                
                ax.text(
                    bar.get_x() + bar.get_width()/2., 
                    height + std,
                    label_text,
                    ha='center', 
                    va='bottom',
                    fontsize=9,
                    fontweight='bold'
                )
            
            # Hitung dan tampilkan persentase perbedaan
            if len(stats_df) == 2:
                val1, val2 = stats_df["Mean"].iloc[0], stats_df["Mean"].iloc[1]
                brand1, brand2 = stats_df["Brand"].iloc[0], stats_df["Brand"].iloc[1]
                
                avg_val = (val1 + val2) / 2
                diff_pct = abs(val1 - val2) / avg_val * 100 if avg_val != 0 else 0
                
                if higher_is_better.get(metric, True):
                    winner = brand1 if val1 > val2 else brand2
                else:
                    winner = brand1 if val1 < val2 else brand2
                
                ax.text(
                    0.5, 0.95, 
                    f'Œî: {diff_pct:.1f}% | {winner}',
                    transform=ax.transAxes,
                    ha='center',
                    va='top',
                    fontsize=8,
                    bbox=dict(boxstyle='round,pad=0.4', facecolor='yellow', alpha=0.3)
                )

        # === LEGEND DAN NOTE ===
        ax_legend = fig.add_subplot(gs[1, 2])
        ax_legend.axis('off')
        
        # Custom legend
        legend_elements = [
            Patch(facecolor=palette.get(b.lower(), "#888888"), 
                  edgecolor='black', 
                  label=b.capitalize()) 
            for b in brands_available
        ]
        legend = ax_legend.legend(
            handles=legend_elements, 
            loc='center',
            fontsize=12,
            title="Brand",
            title_fontsize=14,
            frameon=True,
            fancybox=True,
            shadow=True
        )
        
        # Note
        note_text = (
            "üìä Notes:\n"
            "‚Ä¢ Error bars = std deviation\n"
            "‚Ä¢ Œî = % difference\n"
            "‚Ä¢ Winner by metric preference\n"
            "‚Ä¢ Higher is better (except FWHM)"
        )
        ax_legend.text(
            0.5, 0.3,
            note_text,
            transform=ax_legend.transAxes,
            ha='center',
            va='top',
            fontsize=9,
            bbox=dict(boxstyle='round,pad=0.6', facecolor='lightblue', alpha=0.3),
            family='monospace'
        )

        # Simpan gambar
        safe_filename = "".join(
            [c for c in part if c.isalpha() or c.isdigit() or c == " "]
        ).rstrip()
        file_name = f"{safe_filename}_comparison.png"
        save_path = os.path.join(output_folder, file_name)
        
        fig.savefig(save_path, bbox_inches="tight", dpi=300)
        plt.close(fig)  # Tutup figure agar tidak ditampilkan di output
        
        print(f"‚úÖ Tersimpan: {file_name}")

    print("\n" + "=" * 80)
    print(f"üéâ Selesai! Total {len(body_parts_to_plot)} gambar telah disimpan.")
    print("=" * 80 + "\n")


# --- EKSEKUSI PROGRAM ---
try:
    print("\n" + "=" * 80)
    print("üöÄ MEMULAI PROGRAM ANALISIS PERBANDINGAN CITRA")
    print("=" * 80)
    
    print("\nüìÇ Membaca data dari file...")
    df_result = load_and_preprocess_data(file_path)
    print(f"‚úÖ Data berhasil dibaca: {len(df_result)} baris")

    # Tampilkan preview data
    print("\nüìã Preview Data:")
    print("-" * 80)
    display(df_result[["Filename", "Body Part", "Brand", "Score"]].head(10))
    
    # VALIDASI DATA TERLEBIH DAHULU
    validation_report = validate_data(df_result)
    
    # Jika ada data yang lengkap, lanjutkan ke plotting
    if validation_report['complete']:
        # Menentukan folder output
        base_dir = os.path.dirname(os.path.abspath(file_path))
        output_dir = os.path.join(base_dir, "hasil_plot_perbandingan")
        os.makedirs(output_dir, exist_ok=True)
        
        # Jalankan visualisasi
        generate_comparison_plots(df_result, output_dir, validation_report)
        
        print(f"\nüìÅ Semua gambar telah disimpan di:\n   {output_dir}")
    else:
        print("\n‚ùå Tidak dapat melanjutkan plotting karena tidak ada data yang lengkap.")
        print("Silakan perbaiki data Anda terlebih dahulu.\n")

except FileNotFoundError:
    print(f"\n‚ùå Error: File tidak ditemukan di path: {file_path}")
    print("Pastikan nama file dan path sudah benar.\n")
except Exception as e:
    print(f"\n‚ùå Terjadi kesalahan: {e}")
    import traceback
    traceback.print_exc()
