In [2]:
from pathlib import Path
import sys
import numpy as np
import pandas as pd

ROOT = next((p for p in [Path.cwd(), *Path.cwd().parents] if (p / "scripts").is_dir() or (p / "data").is_dir()), None)
if ROOT is None:
    raise RuntimeError("Repo-Root not found (expected folder 'scripts' or 'data').")
sys.path.insert(0, str(ROOT))

DATA_DIR = ROOT / "data"
DRF_DIRS_BIG = [(DATA_DIR / "drf_big" / f"precomputed_drf_{m}", m) for m in ("edge", "vertex", "sp")]
DRF_DIRS_SMALL = [(DATA_DIR / "drf_small" / f"precomputed_drf_{m}", m) for m in ("edge", "vertex", "sp")]
ITS_DIRS_BIG = [(DATA_DIR / "its_big" / f"precomputed_its_{m}", m) for m in ("edge", "vertex", "sp")]
ITS_DIRS_SMALL = [(DATA_DIR / "its_small" / f"precomputed_its_{m}", m) for m in ("edge", "vertex", "sp")]

In [3]:
from scripts.wp3.tryout import (
    load_pickles_from_dir,
    compute_kernel_matrix,
    train_eval_svm_precomputed,
    plot_confusion_matrix,
    plot_metric_bars,
    plot_runtime_bars,
)
from scripts.wp3.tryout import run_aggregated_benchmark, plot_metric_bars, plot_runtime_bars, gather_capped_dataset
from scripts.wp3.tryout import gather_capped_dataset, compute_pairwise_accuracy_matrix, plot_pairwise_accuracy_heatmap, plot_class_difficulty_bars


In [3]:
# Example: DRF + edge on a single small subset file (first file)
drf_edge_small_dir = DRF_DIRS_SMALL[0][0]
datasets = load_pickles_from_dir(drf_edge_small_dir, method_hint="DRF", drop_failed=True)
len(datasets), datasets[0]["file"].name, datasets[0]["mode"], datasets[0]["method"]

(834, 'subset_001.reaction_features_drf_wl_h3_edge.pkl', 'edge', 'DRF')

In [4]:
# Train/evaluate SVM with precomputed kernel on the first dataset
res = train_eval_svm_precomputed(
    features=datasets[0]["features"],
    y_raw=datasets[0]["y_raw"],
    test_size=0.2,
    random_state=42,
    C=1.0,
    class_weight=None,            # or "balanced" if classes imbalanced
    kernel_normalize=True,        # normalized kernel is often beneficial
)

print(f"ACC: {res['acc']:.3f} | F1-macro: {res['f1_macro']:.3f}")
print("Timings (s):", res["timings"])

# Confusion matrix
labels = list(res["label_encoder"].classes_)
fig_cm = plot_confusion_matrix(res["cm"], labels, title="DRF-edge (small) Confusion matrix")
fig_cm.show()

ACC: 1.000 | F1-macro: 1.000
Timings (s): {'k_train_time': 0.008929500007070601, 'k_test_time': 0.003102299990132451, 'fit_time': 0.0011399000650271773, 'predict_time': 0.00016299996059387922, 'total_time': 0.013334700022824109}


In [None]:



# Choose cap per class and test split
cap_per_class = 200   # or 200
test_size = 0.2

results_50xN = run_aggregated_benchmark(
    drf_dirs=DRF_DIRS_BIG,
    its_dirs=ITS_DIRS_BIG,
    n_per_class=cap_per_class,
    dataset_size_label=f"50×{cap_per_class}",
    test_size=test_size,
    random_state=42,
    C=1.0,
    class_weight=None,
    kernel_normalize=True,
    use_linear_if_n_ge=3000,        # auto-switch to linear SVM for large totals
    l2_normalize_features=True,
    max_files_per_dir=None,         # set to limit files if needed
)

results_50xN.sort_values(["method","mode"])

In [6]:
fig_acc = plot_metric_bars(results_50xN, metric="acc", facet_by="dataset_size", title="Accuracy (50 classes, capped)")
fig_acc.show()

fig_f1 = plot_metric_bars(results_50xN, metric="f1_macro", facet_by="dataset_size", title="F1-macro (50 classes, capped)")
fig_f1.show()

# Runtime bars; note kernel times will be NaN when using linear; stacked bars still OK
fig_rt = plot_runtime_bars(results_50xN, facet_by="dataset_size")
fig_rt.show()

In [3]:
# Build capped dataset for a specific method/mode
agg_drf_edge = gather_capped_dataset(
    [(DATA_DIR / "drf_big" / "precomputed_drf_edge", "edge")],
    method="DRF",
    mode="edge",
    n_per_class=200,           # cap per class
    random_state=42,
    dedupe_rsmi=True,
    max_files_per_dir=None,
)

len(agg_drf_edge["y_raw"]), len(set(agg_drf_edge["y_raw"])), list(agg_drf_edge["class_counts"].items())[:5]

(10000,
 50,
 [('6.3.7', 200),
  ('6.2.1', 200),
  ('1.2.1', 200),
  ('10.4.2', 200),
  ('1.3.6', 200)])

In [5]:
all_classes = sorted(agg_drf_edge["have_classes"])  # 50 Klassen
pair_df = compute_pairwise_accuracy_matrix(
    agg_drf_edge["features"],
    agg_drf_edge["y_raw"],
    class_order=all_classes,
    n_per_class=200,
    approach="linear",
)

fig_pair_heat = plot_pairwise_accuracy_heatmap(
    pair_df,
    title="Pairwise ACC (DRF–WL edge, cap=200)",
    class_order=all_classes,
    show_all_ticks=True,
    height=1000,
    width=1600,
    tick_font_size=8,
    tick_angle=-60,
)
fig_pair_heat.show()

In [6]:
# Build capped dataset for a specific method/mode
agg_drf_edge = gather_capped_dataset(
    [(DATA_DIR / "drf_big" / "precomputed_drf_vertex", "vertex")],
    method="DRF",
    mode="vertex",
    n_per_class=200,           # cap per class
    random_state=42,
    dedupe_rsmi=True,
    max_files_per_dir=None,
)

len(agg_drf_edge["y_raw"]), len(set(agg_drf_edge["y_raw"])), list(agg_drf_edge["class_counts"].items())[:5]

(10000,
 50,
 [('6.3.1', 200),
  ('1.2.4', 200),
  ('1.7.4', 200),
  ('2.1.2', 200),
  ('6.2.1', 200)])

In [7]:
all_classes = sorted(agg_drf_edge["have_classes"])  # 50 Klassen
pair_df = compute_pairwise_accuracy_matrix(
    agg_drf_edge["features"],
    agg_drf_edge["y_raw"],
    class_order=all_classes,
    n_per_class=200,
    approach="linear",
)

fig_pair_heat = plot_pairwise_accuracy_heatmap(
    pair_df,
    title="Pairwise ACC (DRF–WL vertex, cap=200)",
    class_order=all_classes,
    show_all_ticks=True,
    height=1000,
    width=1600,
    tick_font_size=8,
    tick_angle=-60,
)
fig_pair_heat.show()

In [8]:
# Build capped dataset for a specific method/mode
agg_drf_edge = gather_capped_dataset(
    [(DATA_DIR / "drf_big" / "precomputed_drf_sp", "sp")],
    method="DRF",
    mode="sp",
    n_per_class=200,           # cap per class
    random_state=42,
    dedupe_rsmi=True,
    max_files_per_dir=None,
)

len(agg_drf_edge["y_raw"]), len(set(agg_drf_edge["y_raw"])), list(agg_drf_edge["class_counts"].items())[:5]

(10000,
 50,
 [('6.3.1', 200),
  ('1.2.4', 200),
  ('1.7.4', 200),
  ('2.1.2', 200),
  ('6.2.1', 200)])

In [9]:
all_classes = sorted(agg_drf_edge["have_classes"])  # 50 Klassen
pair_df = compute_pairwise_accuracy_matrix(
    agg_drf_edge["features"],
    agg_drf_edge["y_raw"],
    class_order=all_classes,
    n_per_class=200,
    approach="linear",
)

fig_pair_heat = plot_pairwise_accuracy_heatmap(
    pair_df,
    title="Pairwise ACC (DRF–WL sp, cap=200)",
    class_order=all_classes,
    show_all_ticks=True,
    height=1000,
    width=1600,
    tick_font_size=8,
    tick_angle=-60,
)
fig_pair_heat.show()

In [10]:
# Build capped dataset for a specific method/mode
agg_drf_edge = gather_capped_dataset(
    [(DATA_DIR / "its_big" / "precomputed_its_edge", "edge")],
    method="ITS",
    mode="edge",
    n_per_class=200,           # cap per class
    random_state=42,
    dedupe_rsmi=True,
    max_files_per_dir=None,
)

len(agg_drf_edge["y_raw"]), len(set(agg_drf_edge["y_raw"])), list(agg_drf_edge["class_counts"].items())[:5]

(10000,
 50,
 [('6.3.1', 200),
  ('1.2.4', 200),
  ('1.7.4', 200),
  ('2.1.2', 200),
  ('6.2.1', 200)])

In [11]:
all_classes = sorted(agg_drf_edge["have_classes"])  # 50 Klassen
pair_df = compute_pairwise_accuracy_matrix(
    agg_drf_edge["features"],
    agg_drf_edge["y_raw"],
    class_order=all_classes,
    n_per_class=200,
    approach="linear",
)

fig_pair_heat = plot_pairwise_accuracy_heatmap(
    pair_df,
    title="Pairwise ACC (ITS–WL edge, cap=200)",
    class_order=all_classes,
    show_all_ticks=True,
    height=1000,
    width=1600,
    tick_font_size=8,
    tick_angle=-60,
)
fig_pair_heat.show()

In [12]:
# Build capped dataset for a specific method/mode
agg_drf_edge = gather_capped_dataset(
    [(DATA_DIR / "its_big" / "precomputed_its_vertex", "vertex")],
    method="ITS",
    mode="vertex",
    n_per_class=200,           # cap per class
    random_state=42,
    dedupe_rsmi=True,
    max_files_per_dir=None,
)

len(agg_drf_edge["y_raw"]), len(set(agg_drf_edge["y_raw"])), list(agg_drf_edge["class_counts"].items())[:5]

(10000,
 50,
 [('6.3.1', 200),
  ('1.2.4', 200),
  ('1.7.4', 200),
  ('2.1.2', 200),
  ('6.2.1', 200)])

In [13]:
all_classes = sorted(agg_drf_edge["have_classes"])  # 50 Klassen
pair_df = compute_pairwise_accuracy_matrix(
    agg_drf_edge["features"],
    agg_drf_edge["y_raw"],
    class_order=all_classes,
    n_per_class=200,
    approach="linear",
)

fig_pair_heat = plot_pairwise_accuracy_heatmap(
    pair_df,
    title="Pairwise ACC (ITS–WL vertex, cap=200)",
    class_order=all_classes,
    show_all_ticks=True,
    height=1000,
    width=1600,
    tick_font_size=8,
    tick_angle=-60,
)
fig_pair_heat.show()

In [14]:
# Build capped dataset for a specific method/mode
agg_drf_edge = gather_capped_dataset(
    [(DATA_DIR / "its_big" / "precomputed_its_sp", "sp")],
    method="ITS",
    mode="sp",
    n_per_class=200,           # cap per class
    random_state=42,
    dedupe_rsmi=True,
    max_files_per_dir=None,
)

len(agg_drf_edge["y_raw"]), len(set(agg_drf_edge["y_raw"])), list(agg_drf_edge["class_counts"].items())[:5]

(10000,
 50,
 [('6.3.1', 200),
  ('1.2.4', 200),
  ('1.7.4', 200),
  ('2.1.2', 200),
  ('6.2.1', 200)])

In [15]:
all_classes = sorted(agg_drf_edge["have_classes"])  # 50 Klassen
pair_df = compute_pairwise_accuracy_matrix(
    agg_drf_edge["features"],
    agg_drf_edge["y_raw"],
    class_order=all_classes,
    n_per_class=200,
    approach="linear",
)

fig_pair_heat = plot_pairwise_accuracy_heatmap(
    pair_df,
    title="Pairwise ACC (ITS–WL sp, cap=200)",
    class_order=all_classes,
    show_all_ticks=True,
    height=1000,
    width=1600,
    tick_font_size=8,
    tick_angle=-60,
)
fig_pair_heat.show()

In [4]:
from scripts.wp3.tryout import run_split_sweep_all, plot_split_metric_line, plot_split_runtime_area, plot_split_metric_box

# Parameter
cap_per_class = 50       # 50×50 = 2.5k Gesamtsamples 
splits = [0.1, 0.2, 0.3, 0.4, 0.5]
seeds = range(5)          # 5 Wiederholungen pro Split

sweep_df = run_split_sweep_all(
    drf_dirs=DRF_DIRS_BIG,
    its_dirs=ITS_DIRS_BIG,
    n_per_class=cap_per_class,
    dataset_size_label=f"50×{cap_per_class}",
    splits=splits,
    seeds=seeds,
    C=1.0,
    class_weight=None,
    kernel_normalize=True,
    use_linear_if_n_ge=3000,
    l2_normalize_features=True,
)

# Liniendiagramme mit Fehlerspanne
fig_acc_line = plot_split_metric_line(sweep_df, metric="acc", facet_by="dataset_size",
                                      color_by="method", line_dash_by="mode",
                                      title="Accuracy vs Test split")
fig_acc_line.show()

fig_f1_line = plot_split_metric_line(sweep_df, metric="f1_macro", facet_by="dataset_size",
                                     color_by="method", line_dash_by="mode",
                                     title="F1-macro vs Test split")
fig_f1_line.show()

# Boxplots (Verteilung über Seeds)
fig_acc_box = plot_split_metric_box(sweep_df, metric="acc", facet_by="dataset_size",
                                    color_by="method", box_group_by="mode",
                                    title="Accuracy distribution vs Test split")
fig_acc_box.show()

# Runtime-Komponenten vs Split (Mean über Seeds)
fig_rt_area = plot_split_runtime_area(sweep_df, components=("k_train_time","fit_time","k_test_time","predict_time"),
                                      facet_by="dataset_size",
                                      title="Runtime components vs Test split")
fig_rt_area.show()

# Optional: zusammenfassende Tabelle
summary = (
    sweep_df.groupby(["dataset_size","method","mode","split"])
    .agg(
        acc_mean=("acc","mean"),
        acc_std=("acc","std"),
        f1_mean=("f1_macro","mean"),
        f1_std=("f1_macro","std"),
        total_time_mean=("total_time","mean"),
    )
    .reset_index()
    .sort_values(["dataset_size","method","mode","split"])
)
summary.head()

Unnamed: 0,dataset_size,method,mode,split,acc_mean,acc_std,f1_mean,f1_std,total_time_mean
0,50×50,DRF,edge,0.1,0.72,0.026077,0.715236,0.025843,21.658489
1,50×50,DRF,edge,0.2,0.7188,0.014464,0.714172,0.0137,31.572996
2,50×50,DRF,edge,0.3,0.711733,0.012698,0.710283,0.01309,31.118398
3,50×50,DRF,edge,0.4,0.7024,0.007301,0.698497,0.007911,25.599645
4,50×50,DRF,edge,0.5,0.69136,0.009093,0.685186,0.01157,21.439905


In [4]:
from scripts.wp3.tryout import run_split_sweep_all, plot_split_metric_line, plot_split_runtime_area, plot_split_metric_box

# Parameter
cap_per_class = 50       # 50×50 = 2.5k Gesamtsamples 
splits = [0.1, 0.2, 0.3, 0.4, 0.5]
seeds = range(5)          # 5 Wiederholungen pro Split

sweep_df_2 = run_split_sweep_all(
    drf_dirs=DRF_DIRS_BIG,
    its_dirs=ITS_DIRS_BIG,
    n_per_class=cap_per_class,
    dataset_size_label=f"50×{cap_per_class}",
    splits=splits,
    seeds=seeds,
    C=1.0,
    class_weight=None,
    kernel_normalize=True,
    use_linear_if_n_ge=0,
    l2_normalize_features=True,
)

# Liniendiagramme mit Fehlerspanne
fig_acc_line = plot_split_metric_line(sweep_df_2, metric="acc", facet_by="dataset_size",
                                      color_by="method", line_dash_by="mode",
                                      title="Accuracy vs Test split")
fig_acc_line.show()

fig_f1_line = plot_split_metric_line(sweep_df_2, metric="f1_macro", facet_by="dataset_size",
                                     color_by="method", line_dash_by="mode",
                                     title="F1-macro vs Test split")
fig_f1_line.show()

# Boxplots (Verteilung über Seeds)
fig_acc_box = plot_split_metric_box(sweep_df_2, metric="acc", facet_by="dataset_size",
                                    color_by="method", box_group_by="mode",
                                    title="Accuracy distribution vs Test split")
fig_acc_box.show()

# Runtime-Komponenten vs Split (Mean über Seeds)
fig_rt_area = plot_split_runtime_area(sweep_df_2, components=("k_train_time","fit_time","k_test_time","predict_time"),
                                      facet_by="dataset_size",
                                      title="Runtime components vs Test split")
fig_rt_area.show()

# Optional: zusammenfassende Tabelle
summary_2 = (
    sweep_df_2.groupby(["dataset_size","method","mode","split"])
    .agg(
        acc_mean=("acc","mean"),
        acc_std=("acc","std"),
        f1_mean=("f1_macro","mean"),
        f1_std=("f1_macro","std"),
        total_time_mean=("total_time","mean"),
    )
    .reset_index()
    .sort_values(["dataset_size","method","mode","split"])
)
summary_2.head()

[1/150] DRF/edge split=0.1 seed=0 n=2500 approach=linear acc=0.752 run=0.31s elapsed=1s ETA=3m39s
[2/150] DRF/edge split=0.1 seed=1 n=2500 approach=linear acc=0.696 run=0.33s elapsed=2s ETA=2m13s
[3/150] DRF/edge split=0.1 seed=2 n=2500 approach=linear acc=0.728 run=0.32s elapsed=2s ETA=1m44s
[4/150] DRF/edge split=0.1 seed=3 n=2500 approach=linear acc=0.684 run=0.31s elapsed=2s ETA=1m29s
[5/150] DRF/edge split=0.1 seed=4 n=2500 approach=linear acc=0.732 run=0.31s elapsed=3s ETA=1m20s
[6/150] DRF/edge split=0.2 seed=0 n=2500 approach=linear acc=0.712 run=0.31s elapsed=3s ETA=1m13s
[7/150] DRF/edge split=0.2 seed=1 n=2500 approach=linear acc=0.700 run=0.30s elapsed=3s ETA=1m09s
[8/150] DRF/edge split=0.2 seed=2 n=2500 approach=linear acc=0.704 run=0.30s elapsed=4s ETA=1m05s
[9/150] DRF/edge split=0.2 seed=3 n=2500 approach=linear acc=0.694 run=0.30s elapsed=4s ETA=1m02s
[10/150] DRF/edge split=0.2 seed=4 n=2500 approach=linear acc=0.724 run=0.30s elapsed=4s ETA=1m00s
[11/150] DRF/edge s

Unnamed: 0,dataset_size,method,mode,split,acc_mean,acc_std,f1_mean,f1_std,total_time_mean
0,50×50,DRF,edge,0.1,0.7184,0.027799,0.703235,0.02889,0.245255
1,50×50,DRF,edge,0.2,0.7068,0.011628,0.691164,0.012913,0.228072
2,50×50,DRF,edge,0.3,0.707467,0.010181,0.694014,0.012505,0.205296
3,50×50,DRF,edge,0.4,0.6952,0.00923,0.679428,0.01041,0.185702
4,50×50,DRF,edge,0.5,0.68464,0.006845,0.666058,0.00814,0.162168
