In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
BASE_DIR = "/content/drive/MyDrive/SKRIPSI"
OUT_DIR  = f"{BASE_DIR}/models"

# Eval

In [12]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from matplotlib.colors import LinearSegmentedColormap


pink_cmap = LinearSegmentedColormap.from_list(
    "soft_pink_dark",
    ["#FCE4EC", "#F8BBD0", "#EC407A"]
)

BASE_MODELS_DIR = "/content/drive/MyDrive/SKRIPSI/models"

# ðŸŒ¸ pink gradient
pink_cmap = LinearSegmentedColormap.from_list(
    "soft_pink_dark",
    ["#FCE4EC", "#F8BBD0", "#EC407A"]
)

def safe_get(d, keys, default=None):
    cur = d
    for k in keys:
        if not isinstance(cur, dict) or k not in cur:
            return default
        cur = cur[k]
    return cur

rows = []

# struktur baru:
# models/<ModelTag>/(holdout/<HO_xxx>/summary.json) atau (kfold/k<k>/fold<fold>/summary.json)
for model_name in os.listdir(BASE_MODELS_DIR):
    model_dir = os.path.join(BASE_MODELS_DIR, model_name)
    if not os.path.isdir(model_dir) or model_name.startswith("_"):
        continue

    # split_type: holdout / kfold
    for split_type in ["holdout", "kfold"]:
        split_dir = os.path.join(model_dir, split_type)
        if not os.path.isdir(split_dir):
            continue

        if split_type == "holdout":
            # holdout/<HO_xxx>/
            for scheme_name in sorted(os.listdir(split_dir)):
                run_dir = os.path.join(split_dir, scheme_name)
                if not os.path.isdir(run_dir):
                    continue

                summary_path = os.path.join(run_dir, "summary.json")
                if not os.path.exists(summary_path):
                    continue

                with open(summary_path, "r", encoding="utf-8") as f:
                    s = json.load(f)

                train_time_sec = safe_get(s, ["train_time_sec"], None)
                best_f1 = safe_get(s, ["eval_metrics", "eval_f1_macro"], None)

                # epoch terakhir (kadang ada di train_metrics)
                final_epoch = safe_get(s, ["train_metrics", "epoch"], None)
                if final_epoch is None:
                    final_epoch = safe_get(s, ["eval_metrics", "epoch"], None)

                rows.append({
                    "Model": model_name,
                    "Split": "holdout",
                    "Scheme": scheme_name,            # contoh: HO_80_10_10
                    "Fold": None,
                    "Training Time (Minutes)": (train_time_sec / 60.0) if train_time_sec is not None else None,
                    "Final Epoch": final_epoch,
                    "Best F1-Macro": best_f1,
                    "Run Path": run_dir
                })

        else:
            # kfold/k2/fold1/...
            for k_name in sorted(os.listdir(split_dir)):  # contoh: k2, k3, k5
                k_dir = os.path.join(split_dir, k_name)
                if not os.path.isdir(k_dir):
                    continue

                for fold_name in sorted(os.listdir(k_dir)):  # contoh: fold1
                    run_dir = os.path.join(k_dir, fold_name)
                    if not os.path.isdir(run_dir):
                        continue

                    summary_path = os.path.join(run_dir, "summary.json")
                    if not os.path.exists(summary_path):
                        continue

                    with open(summary_path, "r", encoding="utf-8") as f:
                        s = json.load(f)

                    train_time_sec = safe_get(s, ["train_time_sec"], None)
                    best_f1 = safe_get(s, ["eval_metrics", "eval_f1_macro"], None)

                    final_epoch = safe_get(s, ["train_metrics", "epoch"], None)
                    if final_epoch is None:
                        final_epoch = safe_get(s, ["eval_metrics", "epoch"], None)

                    rows.append({
                        "Model": model_name,
                        "Split": "kfold",
                        "Scheme": k_name,               # contoh: k2
                        "Fold": fold_name,              # contoh: fold1
                        "Training Time (Minutes)": (train_time_sec / 60.0) if train_time_sec is not None else None,
                        "Final Epoch": final_epoch,
                        "Best F1-Macro": best_f1,
                        "Run Path": run_dir
                    })

df_time = pd.DataFrame(rows)

if df_time.empty:
    print("Tidak ada summary.json yang ditemukan. Pastikan training sudah menyimpan summary.json di tiap run folder.")
else:
    df_time = df_time.sort_values(["Model", "Split", "Scheme", "Fold"], na_position="last").reset_index(drop=True)

    # tampilkan per model
    for model in df_time["Model"].unique():
        print(f"\n===== TRAINING TIME TABLE | {model} =====")
        view = df_time[df_time["Model"] == model][
            ["Split", "Scheme", "Fold", "Training Time (Minutes)", "Final Epoch", "Best F1-Macro"]
        ]

        display(
            view.style
            .background_gradient(cmap=pink_cmap)
            .format({
                "Training Time (Minutes)": lambda x: "-" if pd.isna(x) else f"{x:.2f}",
                "Best F1-Macro": lambda x: "-" if pd.isna(x) else f"{x:.4f}",
            })
        )

    # simpan CSV
    csv_path = os.path.join(BASE_MODELS_DIR, "summary_training_time_per_model.csv")
    df_time.to_csv(csv_path, index=False)
    print("\nSaved:", csv_path)


===== TRAINING TIME TABLE | IndoBERT =====


Unnamed: 0,Split,Scheme,Fold,Training Time (Minutes),Final Epoch,Best F1-Macro
0,holdout,HO_70_15_15,,4.06,8.0,0.7929
1,holdout,HO_80_10_10,,3.29,6.0,0.7815
2,holdout,HO_90_05_05,,4.07,8.0,0.7937
3,kfold,k2,fold1,2.25,5.0,0.7529
4,kfold,k2,fold2,1.83,4.0,0.7434
5,kfold,k3,fold1,2.08,4.0,0.7687
6,kfold,k3,fold2,3.09,6.0,0.7653
7,kfold,k3,fold3,3.1,6.0,0.7611
8,kfold,k4,fold1,2.22,4.0,0.7591
9,kfold,k4,fold2,1.65,3.0,0.7642



===== TRAINING TIME TABLE | IndoBERTweet =====


Unnamed: 0,Split,Scheme,Fold,Training Time (Minutes),Final Epoch,Best F1-Macro
23,holdout,HO_70_15_15,,2.89,6.0,0.7772
24,holdout,HO_80_10_10,,3.66,7.0,0.7823
25,holdout,HO_90_05_05,,2.88,6.0,0.7772
26,kfold,k2,fold1,2.55,6.0,0.7675
27,kfold,k2,fold2,2.54,6.0,0.7616
28,kfold,k3,fold1,3.37,7.0,0.7866
29,kfold,k3,fold2,2.41,5.0,0.7692
30,kfold,k3,fold3,2.91,6.0,0.7649
31,kfold,k4,fold1,3.12,6.0,0.7848
32,kfold,k4,fold2,5.68,11.0,0.7908



===== TRAINING TIME TABLE | IndoRoBERTa =====


Unnamed: 0,Split,Scheme,Fold,Training Time (Minutes),Final Epoch,Best F1-Macro
46,holdout,HO_70_15_15,,3.06,6.0,0.729
47,holdout,HO_80_10_10,,2.22,4.0,0.7432
48,holdout,HO_90_05_05,,3.05,6.0,0.729
49,kfold,k2,fold1,3.64,8.0,0.7126
50,kfold,k2,fold2,3.15,7.0,0.7075
51,kfold,k3,fold1,4.13,8.0,0.7236
52,kfold,k3,fold2,3.09,6.0,0.7244
53,kfold,k3,fold3,2.61,5.0,0.7152
54,kfold,k4,fold1,2.76,5.0,0.7178
55,kfold,k4,fold2,3.32,6.0,0.7207



Saved: /content/drive/MyDrive/SKRIPSI/models/summary_training_time_per_model.csv


In [22]:
cols_show = [
    "Model", "Split", "Scheme", "Fold",
    "Training Time (Minutes)", "Final Epoch", "Best F1-Macro"
]

styled = (
    df_13[cols_show].style
    # HEADER STYLE
    .set_table_styles([
        {"selector": "th",
         "props": [
             ("background-color", "#EC407A"),
             ("color", "white"),
             ("text-align", "center"),
             ("border", "1px solid #AD1457")
         ]},
        {"selector": "td",
         "props": [
             ("border", "1px solid #F06292")
         ]}
    ])
    # Semua kolom pink muda
    .set_properties(subset=[
        "Model", "Split", "Scheme", "Fold",
        "Training Time (Minutes)", "Final Epoch"
    ], **{
        "background-color": "#FCE4EC",
        "color": "black",
        "text-align": "center"
    })
    # F1 tetap gradasi
    .background_gradient(
        subset=["Best F1-Macro"],
        cmap=LinearSegmentedColormap.from_list(
            "pink_grad",
            ["#FCE4EC", "#F48FB1", "#C2185B"]
        )
    )
    .format({
        "Training Time (Minutes)": lambda x: "-" if pd.isna(x) else f"{x:.2f}",
        "Best F1-Macro": lambda x: "-" if pd.isna(x) else f"{x:.4f}",
    })
)

display(styled)

Unnamed: 0,Model,Split,Scheme,Fold,Training Time (Minutes),Final Epoch,Best F1-Macro
0,IndoBERT,holdout,HO_70_15_15,,4.06,8.0,0.7929
1,IndoBERT,holdout,HO_80_10_10,,3.29,6.0,0.7815
2,IndoBERT,holdout,HO_90_05_05,,4.07,8.0,0.7937
3,IndoBERTweet,holdout,HO_70_15_15,,2.89,6.0,0.7772
4,IndoBERTweet,holdout,HO_80_10_10,,3.66,7.0,0.7823
5,IndoBERTweet,holdout,HO_90_05_05,,2.88,6.0,0.7772
6,IndoRoBERTa,holdout,HO_70_15_15,,3.06,6.0,0.729
7,IndoRoBERTa,holdout,HO_80_10_10,,2.22,4.0,0.7432
8,IndoRoBERTa,holdout,HO_90_05_05,,3.05,6.0,0.729
9,IndoBERTweet,kfold,k5,fold2,7.51,14.0,0.8099


In [25]:
import pandas as pd
from matplotlib.colors import LinearSegmentedColormap

HOLDOUT_CSV = "/content/drive/MyDrive/SKRIPSI/results/results_holdout_final.csv"
KFOLD_CSV   = "/content/drive/MyDrive/SKRIPSI/results/results_kfold_progress.csv"

df_ho = pd.read_csv(HOLDOUT_CSV)
df_kf = pd.read_csv(KFOLD_CSV)

# Bersihkan nama kolom
for df in (df_ho, df_kf):
    df.columns = df.columns.str.strip().str.lower()

# Tambahkan kolom fold kalau tidak ada (untuk holdout)
if "fold" not in df_ho.columns:
    df_ho["fold"] = None

# Gabung
df_all = pd.concat([df_ho, df_kf], ignore_index=True)

# Rapikan string
df_all["split_type"] = df_all["split_type"].astype(str).str.strip().str.lower()
df_all["scheme"] = df_all["scheme"].astype(str).str.strip().str.lower()
df_all["fold"] = df_all["fold"].astype(str).str.strip().str.lower()
df_all["model"] = df_all["model"].astype(str).str.strip()

print("Kolom tersedia:", df_all.columns.tolist())

Kolom tersedia: ['split_type', 'scheme', 'model', 'train_time_sec', 'eval_loss', 'eval_accuracy', 'eval_precision_macro', 'eval_recall_macro', 'eval_f1_macro', 'saved_to', 'fold']


In [28]:
import re
import pandas as pd
from matplotlib.colors import LinearSegmentedColormap

HOLDOUT_CSV = "/content/drive/MyDrive/SKRIPSI/results/results_holdout_final.csv"
KFOLD_CSV   = "/content/drive/MyDrive/SKRIPSI/results/results_kfold_progress.csv"

df_ho = pd.read_csv(HOLDOUT_CSV)
df_kf = pd.read_csv(KFOLD_CSV)

# lower + strip column names
for df in (df_ho, df_kf):
    df.columns = df.columns.str.strip().str.lower()

# make sure fold exists in holdout
if "fold" not in df_ho.columns:
    df_ho["fold"] = None

df_all = pd.concat([df_ho, df_kf], ignore_index=True)

# basic cleaning
for c in ["split_type", "scheme", "fold", "model"]:
    if c in df_all.columns:
        df_all[c] = df_all[c].astype(str).str.strip()

# ---------- NORMALIZER ----------
def norm_split(x: str) -> str:
    x = (x or "").lower()
    if "hold" in x:
        return "holdout"
    if "k" in x and "fold" in x:
        return "kfold"
    if "cv" in x:
        return "kfold"
    # fallback: if scheme looks like k*
    return "kfold" if re.search(r"\bk\s*=?\s*\d+", x) else x

def norm_k(x: str) -> str:
    x = (x or "").lower().replace(" ", "")
    # examples: "k5", "k=5", "5", "k-fold5"
    m = re.search(r"k=?(\d+)", x)
    if m:
        return f"k{m.group(1)}"
    m = re.fullmatch(r"(\d+)", x)
    if m:
        return f"k{m.group(1)}"
    return x

def norm_fold(x: str) -> str:
    x = (x or "").lower().replace(" ", "")
    # examples: "fold2", "fold_2", "2", "f2"
    m = re.search(r"fold_?(\d+)", x)
    if m:
        return f"fold{m.group(1)}"
    m = re.search(r"\bf(\d+)\b", x)
    if m:
        return f"fold{m.group(1)}"
    m = re.fullmatch(r"(\d+)", x)
    if m:
        return f"fold{m.group(1)}"
    # holdout case like "none"
    return None if x in ["none", "nan", "null", ""] else x

df_all["split_norm"] = df_all["split_type"].apply(norm_split)
df_all["scheme_norm"] = df_all["scheme"].apply(norm_k)
df_all["fold_norm"] = df_all["fold"].apply(norm_fold)
df_all["model_norm"] = df_all["model"].str.lower()

# quick sanity print (biar kamu lihat bentuknya)
print("split_norm unique:", df_all["split_norm"].unique())
print("scheme_norm unique:", sorted([x for x in df_all["scheme_norm"].unique() if isinstance(x,str)]))
print("fold_norm unique:", sorted([x for x in df_all["fold_norm"].unique() if isinstance(x,str)])[:15], "...")

# ---------- FILTER 13 RUNS ----------
keep_holdout = df_all[df_all["split_norm"] == "holdout"].copy()

keep_k5f2_tweet = df_all[
    (df_all["split_norm"] == "kfold") &
    (df_all["scheme_norm"] == "k5") &
    (df_all["fold_norm"] == "fold2") &
    (df_all["model_norm"].str.contains("tweet"))
].copy()

keep_k6f2 = df_all[
    (df_all["split_norm"] == "kfold") &
    (df_all["scheme_norm"] == "k6") &
    (df_all["fold_norm"] == "fold2") &
    (
        df_all["model_norm"].str.contains("tweet") |
        df_all["model_norm"].str.contains("roberta")
    )
].copy()

keep_k6f4_roberta = df_all[
    (df_all["split_norm"] == "kfold") &
    (df_all["scheme_norm"] == "k6") &
    (df_all["fold_norm"] == "fold4") &
    (df_all["model_norm"].str.contains("roberta"))
].copy()

df_13 = pd.concat([keep_holdout, keep_k5f2_tweet, keep_k6f2, keep_k6f4_roberta], ignore_index=True)
df_13 = df_13.drop_duplicates(subset=["split_norm","scheme_norm","fold_norm","model"]).reset_index(drop=True)

print("Total rows (expected 13):", len(df_13))

# ---------- DISPLAY METRICS ONLY ----------
cols_metrics = [
    "split_norm","scheme_norm","fold_norm","model",
    "eval_accuracy","eval_precision_macro","eval_recall_macro","eval_f1_macro"
]

pink_grad = LinearSegmentedColormap.from_list("pink_grad", ["#FCE4EC", "#F48FB1", "#C2185B"])

display(
    df_13[cols_metrics].sort_values(["split_norm","model","scheme_norm","fold_norm"]).style
    .set_table_styles([
        {"selector": "th", "props": [
            ("background-color", "#EC407A"),
            ("color", "white"),
            ("border", "1px solid #AD1457"),
            ("text-align", "center")
        ]},
        {"selector": "td", "props": [
            ("border", "1px solid #F06292"),
            ("text-align", "center")
        ]}
    ])
    .set_properties(subset=[
        "split_norm","scheme_norm","fold_norm","model",
        "eval_accuracy","eval_precision_macro","eval_recall_macro"
    ], **{
        "background-color": "#FCE4EC",
        "color": "black"
    })
    .background_gradient(subset=["eval_f1_macro"], cmap=pink_grad)
    .format({
        "eval_accuracy": "{:.4f}",
        "eval_precision_macro": "{:.4f}",
        "eval_recall_macro": "{:.4f}",
        "eval_f1_macro": "{:.4f}",
    })
)

split_norm unique: ['holdout' 'kfold']
scheme_norm unique: ['ho_70_15_15', 'ho_80_10_10', 'ho_90_05_05', 'k2', 'k3', 'k4', 'k5', 'k6', 'k7']
fold_norm unique: ['fold1', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7'] ...
Total rows (expected 13): 13


Unnamed: 0,split_norm,scheme_norm,fold_norm,model,eval_accuracy,eval_precision_macro,eval_recall_macro,eval_f1_macro
3,holdout,ho_70_15_15,,IndoBERT,0.7837,0.7952,0.7953,0.7929
0,holdout,ho_80_10_10,,IndoBERT,0.7738,0.7764,0.796,0.7815
6,holdout,ho_90_05_05,,IndoBERT,0.7865,0.7939,0.7985,0.7937
4,holdout,ho_70_15_15,,IndoBERTweet,0.7685,0.7686,0.7928,0.7772
1,holdout,ho_80_10_10,,IndoBERTweet,0.7724,0.775,0.7961,0.7823
7,holdout,ho_90_05_05,,IndoBERTweet,0.7685,0.7686,0.7928,0.7772
5,holdout,ho_70_15_15,,IndoRoBERTa,0.7192,0.7223,0.7424,0.729
2,holdout,ho_80_10_10,,IndoRoBERTa,0.7354,0.7398,0.7546,0.7432
8,holdout,ho_90_05_05,,IndoRoBERTa,0.7192,0.7223,0.7424,0.729
9,kfold,k5,fold2,IndoBERTweet,0.803,0.8047,0.8189,0.8099


In [13]:
def plot_train_vs_val_loss(log_df, title, out_path=None):

    if log_df is None or "epoch" not in log_df.columns:
        return

    train_df = log_df[log_df.get("loss").notna()][["epoch", "loss"]].copy() if "loss" in log_df.columns else pd.DataFrame()
    val_df   = log_df[log_df.get("eval_loss").notna()][["epoch", "eval_loss"]].copy() if "eval_loss" in log_df.columns else pd.DataFrame()

    if train_df.empty and val_df.empty:
        return

    plt.figure()

    if not train_df.empty:
        train_df = train_df.groupby("epoch", as_index=False).mean()
        plt.plot(train_df["epoch"], train_df["loss"], color="#F8BBD0")

    if not val_df.empty:
        val_df = val_df.groupby("epoch", as_index=False).mean()
        plt.plot(val_df["epoch"], val_df["eval_loss"], color="#EC407A")

    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(title)
    plt.legend(["Train Loss", "Val Loss"])

    if out_path:
        plt.savefig(out_path, dpi=200, bbox_inches="tight")

    plt.show()
    plt.close()

In [14]:
def plot_confusion_matrix(true_labels, pred_labels, title, out_path=None):

    cm = confusion_matrix(true_labels, pred_labels)

    plt.figure()
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap=pink_cmap, values_format="d")

    plt.title(title)

    if out_path:
        plt.savefig(out_path, dpi=200, bbox_inches="tight")

    plt.show()
    plt.close()

In [16]:
rows = []

for model_name, split_type, scheme_name, fold_name, run_dir in iter_run_dirs(BASE_MODELS_DIR):
    tag = f"{model_name} | {split_type} | {scheme_name}" + (f" | {fold_name}" if fold_name else "")
    print("\n=== ", tag, "===")

    # 1) Loss curve
    log_df = get_log_df(run_dir)
    loss_png = os.path.join(
        PLOT_OUT_DIR,
        f"LOSS__{model_name}__{split_type}__{scheme_name}" + (f"__{fold_name}" if fold_name else "") + ".png"
    )
    plot_train_vs_val_loss(log_df, title=f"Loss Curve - {tag}", out_path=loss_png)

    # 2) Confusion matrix + metrics
    pred_path = get_val_pred_path(run_dir)
    if pred_path:
        pred_df = pd.read_csv(pred_path)
        y_true = pred_df["true_label"].values
        y_pred = pred_df["pred_label"].values

        acc, prec, rec, f1 = compute_metrics_from_preds(y_true, y_pred)

        cm_png = os.path.join(
            PLOT_OUT_DIR,
            f"CM__{model_name}__{split_type}__{scheme_name}" + (f"__{fold_name}" if fold_name else "") + ".png"
        )
        plot_confusion_matrix(y_true, y_pred, title=f"Confusion Matrix - {tag}", out_path=cm_png)

        rows.append({
            "Model": model_name,
            "Split": split_type,
            "Scheme": scheme_name,
            "Fold": fold_name,
            "Accuracy": acc,
            "Precision_Macro": prec,
            "Recall_Macro": rec,
            "F1_Macro": f1,
            "RunDir": run_dir
        })
    else:
        print("  (skip CM) val_predictions.csv tidak ditemukan di:", run_dir)

df_eval = pd.DataFrame(rows)
csv_path = os.path.join(PLOT_OUT_DIR, "eval_from_val_predictions.csv")
df_eval.to_csv(csv_path, index=False)
print("\nSaved:", csv_path)

Output hidden; open in https://colab.research.google.com to view.

In [18]:
!zip -r /content/evaluation_results.zip /content/drive/MyDrive/SKRIPSI/models/_plots

  adding: content/drive/MyDrive/SKRIPSI/models/_plots/ (stored 0%)
  adding: content/drive/MyDrive/SKRIPSI/models/_plots/LOSS__IndoBERT__holdout__HO_70_15_15.png (deflated 11%)
  adding: content/drive/MyDrive/SKRIPSI/models/_plots/CM__IndoBERT__holdout__HO_70_15_15.png (deflated 12%)
  adding: content/drive/MyDrive/SKRIPSI/models/_plots/LOSS__IndoBERT__holdout__HO_80_10_10.png (deflated 10%)
  adding: content/drive/MyDrive/SKRIPSI/models/_plots/CM__IndoBERT__holdout__HO_80_10_10.png (deflated 12%)
  adding: content/drive/MyDrive/SKRIPSI/models/_plots/LOSS__IndoBERT__holdout__HO_90_05_05.png (deflated 11%)
  adding: content/drive/MyDrive/SKRIPSI/models/_plots/CM__IndoBERT__holdout__HO_90_05_05.png (deflated 11%)
  adding: content/drive/MyDrive/SKRIPSI/models/_plots/LOSS__IndoBERT__kfold__k2__fold1.png (deflated 12%)
  adding: content/drive/MyDrive/SKRIPSI/models/_plots/CM__IndoBERT__kfold__k2__fold1.png (deflated 10%)
  adding: content/drive/MyDrive/SKRIPSI/models/_plots/LOSS__IndoBERT_

In [19]:
from google.colab import files
files.download("/content/evaluation_results.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>