In [22]:
import pandas as pd
import numpy as np

# --- 1. Konfigurasi Nama File ---
FILE_GROUND_TRUTH = '/Users/reithvxz/Documents/UNAIR/Semester 5/NLP/UAS/REVISI UAS/Ground Truth Putusan (Model).csv'
FILE_PREDIKSI = '/Users/reithvxz/Documents/UNAIR/Semester 5/NLP/UAS/REVISI UAS/HASIL_FINAL_PIPELINE_B.xlsx'

# --- 2. Fungsi Hitung Matriks (Precision, Recall, F1) ---
def calculate_metrics_token_based(gt_text, pred_text):
    """
    Menghitung kemiripan berdasarkan overlap kata (token).
    """
    # Normalisasi: huruf kecil & hapus spasi berlebih
    gt_text = str(gt_text).lower().strip()
    pred_text = str(pred_text).lower().strip()
    
    # Ganti 'nan' string dengan empty
    if gt_text == 'nan': gt_text = ''
    if pred_text == 'nan': pred_text = ''
    
    # Kasus: Keduanya Kosong (Perfect Match)
    if not gt_text and not pred_text:
        return 1.0, 1.0, 1.0
        
    # Kasus: Salah satu kosong
    if not gt_text and pred_text: # False Positive
        return 0.0, 0.0, 0.0
    if gt_text and not pred_text: # False Negative
        return 0.0, 0.0, 0.0
        
    # Tokenisasi (Pecah jadi set kata unik)
    gt_tokens = set(gt_text.split())
    pred_tokens = set(pred_text.split())
    
    # Hitung Intersection (Irisan kata yang sama)
    tp = len(gt_tokens.intersection(pred_tokens)) # True Positive
    fp = len(pred_tokens - gt_tokens)             # False Positive (Kata di prediksi yg gak ada di GT)
    fn = len(gt_tokens - pred_tokens)             # False Negative (Kata di GT yg gak ketebak)
    
    # Hitung Rumus
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    
    if (precision + recall) > 0:
        f1 = 2 * (precision * recall) / (precision + recall)
    else:
        f1 = 0.0
        
    return precision, recall, f1

# --- 3. Load Data ---
print("Membaca file...")
try:
    df_gt = pd.read_csv(FILE_GROUND_TRUTH)
    df_pred = pd.read_excel(FILE_PREDIKSI)
except FileNotFoundError as e:
    print(f"Error: {e}")
    exit()

# Pastikan kolom NAMA_FILE ada dan tipenya string untuk penggabungan
df_gt['NAMA_FILE'] = df_gt['NAMA_FILE'].astype(str).str.strip()
df_pred['NAMA_FILE'] = df_pred['NAMA_FILE'].astype(str).str.strip()

# --- 4. Gabungkan Data (Merge) ---
# Hanya membandingkan file yang ada di KEDUA dataset
merged_df = pd.merge(df_gt, df_pred, on='NAMA_FILE', suffixes=('_GT', '_PRED'))

# Tentukan kolom entitas yang akan dinilai (kecuali NAMA_FILE)
target_columns = [col for col in df_gt.columns if col != 'NAMA_FILE' and col in df_pred.columns]

# --- 5. Iterasi Perhitungan ---
report_data = []

print("\nMenghitung skor per entitas...")
for col in target_columns:
    scores_p = []
    scores_r = []
    scores_f1 = []
    
    for index, row in merged_df.iterrows():
        gt_val = row[col + '_GT']
        pred_val = row[col + '_PRED']
        
        p, r, f1 = calculate_metrics_token_based(gt_val, pred_val)
        
        scores_p.append(p)
        scores_r.append(r)
        scores_f1.append(f1)
    
    # Rata-rata per kolom
    report_data.append({
        'Entitas': col,
        'Precision': np.mean(scores_p),
        'Recall': np.mean(scores_r),
        'F1 Score': np.mean(scores_f1)
    })

# --- 6. Tampilkan Hasil ---
results_df = pd.DataFrame(report_data)

# Format tampilan angka 4 digit desimal
pd.options.display.float_format = '{:.4f}'.format

print("\n=== HASIL EVALUASI PIPELINE ===")
print(results_df)

print("\n=== RATA-RATA GLOBAL (MACRO AVERAGE) ===")
print(results_df[['Precision', 'Recall', 'F1 Score']].mean())

# Opsional: Simpan hasil ke Excel/CSV
# results_df.to_csv('laporan_evaluasi.csv', index=False)

Membaca file...

Menghitung skor per entitas...

=== HASIL EVALUASI PIPELINE ===
                   Entitas  Precision  Recall  F1 Score
0            NOMOR_PUTUSAN     0.9680  0.9680    0.9680
1        LOKASI_PENGADILAN     0.9160  0.9120    0.9132
2               NAMA_HAKIM     0.9680  0.9680    0.9680
3          TANGGAL_PUTUSAN     0.6880  0.6867    0.6872
4            NAMA_TERDAKWA     0.9520  0.9520    0.9520
5   TANGGAL_LAHIR_TERDAKWA     0.8985  0.8950    0.8964
6           AGAMA_TERDAKWA     0.9940  0.9933    0.9936
7            VONIS_PENJARA     0.9440  0.9408    0.9420
8            PASAL_DAKWAAN     0.9754  0.9704    0.9720
9                   KORBAN     0.9600  0.9576    0.9586
10          STATUS_PUTUSAN     0.8720  0.8689    0.8699
11    AKIBAT_TINDAK_PIDANA     0.9360  0.9335    0.9341
12     MODUS_TINDAK_PIDANA     0.8557  0.8508    0.8525

=== RATA-RATA GLOBAL (MACRO AVERAGE) ===
Precision   0.9175
Recall      0.9151
F1 Score    0.9160
dtype: float64
