In [1]:
from pathlib import Path
import sys
import numpy as np
import pandas as pd

ROOT = next((p for p in [Path.cwd(), *Path.cwd().parents] if (p / "scripts").is_dir() or (p / "data").is_dir()), None)
if ROOT is None:
    raise RuntimeError("Repo-Root not found (expected folder 'scripts' or 'data').")
sys.path.insert(0, str(ROOT))

DATA_DIR = ROOT / "data"
DRF_DIRS_BIG = [(DATA_DIR / "drf_big" / f"precomputed_drf_{m}", m) for m in ("edge", "vertex", "sp")]
DRF_DIRS_SMALL = [(DATA_DIR / "drf_small" / f"precomputed_drf_{m}", m) for m in ("edge", "vertex", "sp")]
ITS_DIRS_BIG = [(DATA_DIR / "its_big" / f"precomputed_its_{m}", m) for m in ("edge", "vertex", "sp")]
ITS_DIRS_SMALL = [(DATA_DIR / "its_small" / f"precomputed_its_{m}", m) for m in ("edge", "vertex", "sp")]

In [2]:
from scripts.wp3.tryout import (
    load_pickles_from_dir,
    compute_kernel_matrix,
    train_eval_svm_precomputed,
    run_benchmark_on_dir,
    run_full_benchmark,
    plot_confusion_matrix,
    plot_metric_bars,
    plot_runtime_bars,
)

In [3]:
# Example: DRF + edge on a single small subset file (first file)
drf_edge_small_dir = DRF_DIRS_SMALL[0][0]
datasets = load_pickles_from_dir(drf_edge_small_dir, method_hint="DRF", drop_failed=True)
len(datasets), datasets[0]["file"].name, datasets[0]["mode"], datasets[0]["method"]

(834, 'subset_001.reaction_features_drf_wl_h3_edge.pkl', 'edge', 'DRF')

In [4]:
# Train/evaluate SVM with precomputed kernel on the first dataset
res = train_eval_svm_precomputed(
    features=datasets[0]["features"],
    y_raw=datasets[0]["y_raw"],
    test_size=0.2,
    random_state=42,
    C=1.0,
    class_weight=None,            # or "balanced" if classes imbalanced
    kernel_normalize=True,        # normalized kernel is often beneficial
)

print(f"ACC: {res['acc']:.3f} | F1-macro: {res['f1_macro']:.3f}")
print("Timings (s):", res["timings"])

# Confusion matrix
labels = list(res["label_encoder"].classes_)
fig_cm = plot_confusion_matrix(res["cm"], labels, title="DRF-edge (small) Confusion matrix")
fig_cm.show()

ACC: 1.000 | F1-macro: 1.000
Timings (s): {'k_train_time': 0.01212829991709441, 'k_test_time': 0.0021243999944999814, 'fit_time': 0.002246399992145598, 'predict_time': 0.0006126000080257654, 'total_time': 0.017111699911765754}


In [5]:
# Small subsets (3 classes × 20 each)
results_small = run_full_benchmark(
    drf_dirs=DRF_DIRS_SMALL,
    its_dirs=ITS_DIRS_SMALL,
    dataset_size="small",
    test_size=0.2,
    random_state=42,
    C=1.0,
    class_weight=None,     # or "balanced"
    kernel_normalize=True,
)
results_small.sort_values(["method", "mode", "file"]).head()

ValueError: The number of classes has to be greater than one; got 1 class

In [None]:
# Big subsets (5 classes × 200 each)
results_big = run_full_benchmark(
    drf_dirs=DRF_DIRS_BIG,
    its_dirs=ITS_DIRS_BIG,
    dataset_size="big",
    test_size=0.2,
    random_state=42,
    C=1.0,
    class_weight=None,
    kernel_normalize=True,
)
results_big.sort_values(["method", "mode", "file"]).head()

In [None]:
all_results = pd.concat([results_small, results_big], ignore_index=True)

# Accuracy bars (facet by dataset size)
fig_acc = plot_metric_bars(all_results, metric="acc", facet_by="dataset_size", title="Accuracy by method/mode")
fig_acc.show()

# F1-macro bars
fig_f1 = plot_metric_bars(all_results, metric="f1_macro", facet_by="dataset_size", title="F1-macro by method/mode")
fig_f1.show()

# Runtime breakdown (stacked)
fig_rt = plot_runtime_bars(all_results, stack_components=("k_train_time","fit_time","k_test_time","predict_time"), facet_by="dataset_size")
fig_rt.show()

# Quick summary table
summary = (
    all_results.groupby(["dataset_size", "method", "mode"])
    .agg(
        acc_mean=("acc","mean"),
        acc_std=("acc","std"),
        f1_mean=("f1_macro","mean"),
        f1_std=("f1_macro","std"),
        total_time_mean=("total_time","mean"),
    )
    .reset_index()
    .sort_values(["dataset_size","method","mode"])
)
summary

In [None]:
splits = [0.1, 0.2, 0.3, 0.4]
sweep_rows = []
for ts in splits:
    df_small = run_full_benchmark(
        drf_dirs=DRF_DIRS_SMALL,
        its_dirs=ITS_DIRS_SMALL,
        dataset_size="small",
        test_size=ts,
        random_state=42,
        C=1.0,
        class_weight=None,
        kernel_normalize=True,
    )
    df_small["split"] = ts
    sweep_rows.append(df_small)
split_sweep = pd.concat(sweep_rows, ignore_index=True)

fig_split = px.line(
    split_sweep.groupby(["split","method","mode"])["acc"].mean().reset_index(),
    x="split", y="acc", color="method", line_dash="mode",
    title="Accuracy vs test split (small subsets)"
)
fig_split.update_layout(yaxis_title="Mean ACC")
fig_split.show()