In [51]:
import pandas as pd
from pathlib import Path
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [52]:
# Setup Path
BASE = Path(r"D:/SEMESTER 6/PROJECT CBR")
EVAL = BASE / "data/eval"
RES = BASE / "data/results"

retrieval_fp = EVAL / "retrieval_eval_triple.csv"
prediction_fp = RES / "predictions.csv"

In [54]:
# I. Evaluasi Retrieval (TF-IDF, SVM, BERT)
# ─────────────────────────────────────────────
df_ret = pd.read_csv(retrieval_fp)

# Hitung rata-rata per model
mean_metrics = df_ret.groupby("model")[["precision@5", "recall@5", "f1@5", "accuracy@5"]].mean().round(3)

In [55]:
# Simpan ulang metrik ke CSV
mean_metrics.to_csv(EVAL / "retrieval_metrics.csv")
print("📊 Rata-rata Metrik Retrieval:")
print(mean_metrics)

# Visualisasi Bar Chart
plt.figure(figsize=(10, 6))
mean_metrics.plot(kind='bar')
plt.title("Performa Retrieval per Model")
plt.ylabel("Score")
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--')
plt.tight_layout()
plt.savefig(EVAL / "retrieval_barplot.png")
plt.close()

📊 Rata-rata Metrik Retrieval:
            precision@5  recall@5   f1@5  accuracy@5
model                                               
IndoBERT           0.04       0.2  0.067         0.2
SVM-TF-IDF         0.00       0.0  0.000         0.0
TF-IDF             0.00       0.0  0.000         0.0


<Figure size 1000x600 with 0 Axes>

In [56]:
# Evaluasi Prediksi Solusi
df_pred = pd.read_csv(prediction_fp)

# Menilai berdasarkan kemiripan string sederhana:
def match(a, b):
    return int(str(a).strip() == str(b).strip())

# Menambahkan ground-truth di sini (simulasi)
manual_ground_truth = {
    1: "Dipidana 2 tahun",
    2: "Mengembalikan uang koperasi",
    3: "Dijatuhi pidana penjara 5 tahun",
    4: "Gugatan tidak diterima",
    5: "Dihukum 1 tahun 6 bulan"
}
df_pred["true_solution"] = df_pred["query_id"].map(manual_ground_truth)

# Hitung akurasi sederhana (string match)
df_pred["correct"] = df_pred.apply(lambda x: match(x["predicted_solution"], x["true_solution"]), axis=1)
df_score = df_pred.groupby("method")["correct"].agg(["sum", "count"])
df_score["accuracy"] = (df_score["sum"] / df_score["count"]).round(3)
df_score = df_score[["accuracy"]]

# Simpan
df_score.to_csv(EVAL / "prediction_metrics.csv")
print("\n✅ Akurasi Prediksi Solusi:")
print(df_score)



✅ Akurasi Prediksi Solusi:
          accuracy
method            
majority       0.0
weighted       0.0
