In [None]:
from pathlib import Path
import sys

ROOT = next((p for p in [Path.cwd(), *Path.cwd().parents] if (p / "scripts").is_dir() or (p / "data").is_dir()), None)
if ROOT is None:
    raise RuntimeError("Repo-Root not found (expected folder 'scripts' or 'data').")
sys.path.insert(0, str(ROOT))

results = [] #results für svm speichern 

DATA_DIR = ROOT / "data"
DRF_DIRS_BIG = [(DATA_DIR / "drf_big" / f"precomputed_drf_{m}", m) for m in ("edge", "vertex", "sp")]
DRF_DIRS_SMALL = [(DATA_DIR / "drf_small" / f"precomputed_drf_{m}", m) for m in ("edge", "vertex", "sp")]
ITS_DIRS_BIG = [(DATA_DIR / "its_big" / f"precomputed_its_{m}", m) for m in ("edge", "vertex", "sp")]
ITS_DIRS_SMALL = [(DATA_DIR / "its_small" / f"precomputed_its_{m}", m) for m in ("edge", "vertex", "sp")]

# WP3 — Kernel-based Classification (SVM)

This notebook implements kernel inner products on precomputed hashed feature sets and runs
SVM classification for DRF–WL and ITS–WL across different feature types (vertex/edge/shortest-path),
dataset sizes, numbers of classes, and train/test splits.

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "vscode"
import pickle
from pathlib import Path
from collections import Counter

#local imports
from scripts.wp3.wp3_loader import (
    load_precomputed_features,
    available_subset_ids,
)

from scripts.wp3.wp3_kernel import (
    compute_kernel_matrix,
    kernel_multiset_intersection,
    kernel_matrix_stats,
)

from scripts.wp3.wp3_svm import (
    run_SVM,
    train_svm_from_datasets,
    ResultLogger,
)

from scripts.wp3.wp3_plots import (
    fig2_style_svm_from_kernel, 
    plot_experiment_dashboard,
    plot_heatmaps_by_k,
    plot_difference_heatmap,
    plot_drf_minus_its_bar,
    plot_drf_vs_its_dots,
    plot_accuracy_by_k,
)

from scripts.wp3.wp3_subset_handler import (
    make_soft_shared_k_classes_config,
    safe_subset_ids
)

from scripts.wp3.wp3_error_handling import (
    debug_pkl_basic, debug_pkl_empties, debug_dir_summary, debug_find_nonzero_kernel_pair, debug_find_nonempty_pair,
    )

logger = ResultLogger() #zum sichern der SVM results


## 1) Paths to precomputed feature directories

We load precomputed feature representations (stored as `.pkl`) for:
- DRF–WL: reactant/product difference features
- ITS–WL: features from the ITS reaction graph
Each representation is available for three feature modes: vertex, edge, shortest-path.

### Load DRF–WL Features
Load precomputed DRF–WL feature sets and reaction class labels for kernel-based classification.

In [None]:

X_drf, y_drf = {}, {}
for path, mode in DRF_DIRS_SMALL:  # ACHTUNG: Reihenfolge (path, mode)
    assert path.exists(), f"Pfad nicht gefunden: {path}"
    X, y = load_precomputed_features(path, feature_key="drf_wl")
    X_drf[mode] = X
    y_drf[mode] = y
    print(f"\nLoaded DRF features ({mode}) from {path}")
    print("Number of reactions:", len(X))
    print("Number of classes:", len(set(y)))

### Load ITS–WL Features
Load precomputed ITS–WL feature sets and reaction class labels derived from the ITS graph.

In [None]:
X_its = {}
y_its = {}
for path, mode in ITS_DIRS_SMALL:  # ACHTUNG: Reihenfolge (path, mode)
    assert path.exists(), f"Pfad nicht gefunden: {path}"
    X, y = load_precomputed_features(path, feature_key="its_wl")
    X_its[mode] = X
    y_its[mode] = y
    print(f"\nLoaded ITS features ({mode}) from {path}")
    print("Number of reactions:", len(X))
    print("Number of classes:", len(set(y)))


The output confirms that all precomputed DRF–WL feature representations
(edge, vertex, and shortest-path) were loaded successfully. Each representation
contains the full dataset of 50,000 reactions across 50 reaction classes,
providing a consistent basis for kernel computation and classification.

## 2) Kernel inner product on hash sets

The lab definition reduces all kernels to counting common elements of two hashed feature sets.
Given two reactions with feature hash sets \(S_G, S_H\), the kernel is:
\[
k(G,H) = |S_G \cap S_H|
\]

Our precomputed features are stored as Counters. For the required hashset kernel, we use the Counter keys.

### Kernel sanity check (DRF–WL)

We verify that the multiset kernel produces meaningful similarities on the precomputed DRF–WL feature multisets.  
Self-similarity \(k(x,x)\) is clearly positive, and different reactions can still share a non-zero overlap, indicating common reaction-change patterns captured by DRF–WL.

In [None]:
mode = "edge"
X = X_its[mode]   # oder X_drf[mode]

debug_find_nonempty_pair(X)
debug_find_nonzero_kernel_pair(X, kernel_multiset_intersection)

### Kernel Matrix Construction

To apply kernel-based classification, the pairwise similarities between all reactions are computed and stored in a kernel matrix. Each entry \(K_{ij}\) represents the multiset kernel value between reactions \(i\) and \(j\). This matrix serves as the direct input for training a Support Vector Machine with a precomputed kernel.

### DRF–WL Kernel Matrix

This heatmap visualizes the DRF–WL edge kernel for a subset of reactions.
The diagonal shows self-similarity, while most off-diagonal values are zero due to the sparsity of DRF features, which encode only reaction-specific changes.
Non-zero entries indicate reactions with similar bond-change patterns.

In [None]:
modes = ["edge", "vertex", "sp"]
n = 200

for mode in modes:
    X_mode = X_drf[mode]  
    y_mode = y_drf[mode]

    K_drf = compute_kernel_matrix(X_mode[:n])
    stats = kernel_matrix_stats(K_drf)

    #print(f"\n[DRF–WL | {mode} | n={n}] stats:", stats)

    fig = px.imshow(
        K_drf,
        title=f"Kernel Matrix Heatmap (DRF–WL {mode}, n={n})",
        aspect="auto",
    )
    fig.show()

**Figure (DRF–WL):** Kernel matrix heatmap using the DRF–WL kernel.
Diagonal entries show self-similarity, while most off-diagonal values are near zero due to the sparsity of reaction-only features.
Non-zero entries indicate reactions with similar bond-change patterns.

#### Error Handling

In [None]:
path = DATA_DIR / "drf_small/precomputed_drf_edge"
debug_pkl_basic(path, feature_key="drf_wl")
debug_pkl_empties(path, feature_key="drf_wl")
debug_dir_summary(path, feature_key="drf_wl")

### ITS–WL Kernel Matrix 

The ITS–WL kernel matrix shows a much denser similarity structure than DRF–WL.
This is expected, since ITS encodes the full combined structure of reactants and products, including unchanged molecular context.
As a result, many reactions share common substructures, leading to higher off-diagonal similarities and a less sparse kernel.

In [None]:
modes = ["edge", "vertex", "sp"]
n = 200

for mode in modes:
    X_mode = X_its[mode]  
    y_mode = y_its[mode]

    K_its = compute_kernel_matrix(X_mode[:n])
    stats = kernel_matrix_stats(K_its)

    #print(f"\n[ITS–WL | {mode} | n={n}] stats:", stats)

    fig = px.imshow(
        K_its,
        title=f"Kernel Matrix Heatmap (ITS–WL {mode}, n={n})",
        aspect="auto",
    )
    fig.show()

**Figure (ITS–WL):** Kernel matrix heatmap using the ITS–WL edge kernel.
The matrix is denser than DRF–WL, since ITS encodes the full molecular context of reactants and products.
Off-diagonal entries therefore reflect shared structural motifs beyond the reaction center.

### Comparison of DRF–WL and ITS–WL Kernel Matrices

The DRF–WL and ITS–WL kernel matrices reveal complementary notions of reaction similarity.
DRF–WL focuses exclusively on reaction-specific changes by computing the symmetric difference between reactant and product features.
As a result, the corresponding kernel matrix is sparse, with non-zero similarities only for reactions that share similar bond-change patterns.

In contrast, ITS–WL operates on the Imaginary Transition State graph, which encodes the full structural context of both reactants and products.
This leads to a denser kernel matrix, as reactions may share common substructures even if their reaction centers differ.

Consequently, DRF–WL provides a highly selective notion of similarity tailored to reaction mechanisms,
whereas ITS–WL captures broader structural resemblance between reactions.
Both representations are therefore suitable for different aspects of reaction classification.

**Figure:** Kernel matrix heatmaps for DRF–WL (bottom) and ITS–WL (top) using edge-based Weisfeiler–Lehman features.
Each entry \(K_{ij}\) corresponds to the multiset intersection between the feature representations of reactions \(i\) and \(j\).
The diagonal indicates self-similarity, while off-diagonal values reflect shared structural or reaction-specific features.
DRF–WL produces a sparse kernel emphasizing reaction changes, whereas ITS–WL yields a denser kernel capturing overall structural similarity.

In [None]:
import numpy as np
import plotly.express as px

def upper_triangle_values(K):
    i, j = np.triu_indices(K.shape[0], k=1)  # nur i<j, ohne Diagonale
    return K[i, j]

vals_drf = upper_triangle_values(K_drf)
vals_its = upper_triangle_values(K_its)

fig = px.histogram(
    x=[vals_drf, vals_its],
    nbins=60,
    opacity=0.55,
    title="DRF vs ITS: Distribution of Kernel Similarities (upper triangle)",
    labels={"value": "Kernel value", "variable": "Kernel"},
)

fig.data[0].name = "DRF–WL"
fig.data[1].name = "ITS–WL"
fig.show()

**Figure:** Distribution of off-diagonal kernel values for DRF–WL and ITS–WL.
DRF–WL produces a highly sparse similarity distribution with many zero entries, reflecting its focus on reaction-specific changes.
In contrast, ITS–WL yields a broader distribution, capturing shared structural context between reactions.

# SVM Classification with a Custom Reaction Kernel
An SVM classifier was trained using a custom multiset-intersection kernel on reaction features.
Because the kernel compares pairs of reactions directly, the kernel matrix was precomputed and used with `kernel="precomputed"`.
This setup allows a fair and consistent comparison between DRF–WL and ITS–WL using the same SVM configuration.


To systematically evaluate kernel variants, we run the same SVM setup for each feature mode separately.
This yields comparable accuracies for edge-, vertex-, and shortest-path-based WL representations without mixing feature spaces.

##  SVM Classification with the Small Data Set

In [None]:
DRF_DIRS_BIG = {
    "edge":   DATA_DIR / "drf_big" / "precomputed_drf_edge",
    "vertex": DATA_DIR / "drf_big" / "precomputed_drf_vertex",
    "sp":     DATA_DIR / "drf_big" / "precomputed_drf_sp",
}

DRF_DIRS_SMALL = {
    "edge":   DATA_DIR / "drf_small" / "precomputed_drf_edge",
    "vertex": DATA_DIR / "drf_small" / "precomputed_drf_vertex",
    "sp":     DATA_DIR / "drf_small" / "precomputed_drf_sp",
}

ITS_DIRS_BIG = {
    "edge":   DATA_DIR / "its_big" / "precomputed_its_edge",
    "vertex": DATA_DIR / "its_big" / "precomputed_its_vertex",
    "sp":     DATA_DIR / "its_big" / "precomputed_its_sp",
}

ITS_DIRS_SMALL = {
    "edge":   DATA_DIR / "its_small" / "precomputed_its_edge",
    "vertex": DATA_DIR / "its_small" / "precomputed_its_vertex",
    "sp":     DATA_DIR / "its_small" / "precomputed_its_sp",
}

In [None]:
# =========================
# Global SVM settings
# =========================
C = 1.0
seed = 42
test_sizes = [0.2]        # baseline
n_values = [600]          # baseline

# -------------------------------------------------------
# Wähle subset_ids so, dass DRF & ITS vergleichbar sind
# -------------------------------------------------------
opt_k = make_soft_shared_k_classes_config(
    drf_edge_dir=DRF_DIRS_SMALL["edge"],
    its_edge_dir=ITS_DIRS_SMALL["edge"],
    k=2,
    take_subsets=20,
    min_per_class=10,
    ref_scan=50,
)

subset_ids = opt_k["subset_ids"]

print("Option 1 config:")
print(opt_k)
print("Chosen subset_ids:", subset_ids)
print("Number of chosen subsets:", len(subset_ids))

### 1) Baseline Comparison: DRF–WL vs ITS–WL

In this experiment, DRF–WL and ITS–WL kernels are compared under identical conditions to provide a fair baseline.
All parameters are fixed (feature mode, dataset size, train/test split, and SVM regularization), and only the
graph representation differs. This allows us to directly assess the impact of reaction-based versus structure-based
graph representations on classification performance.

In [None]:
# =========================
# Section 1: Baseline (k=1 vs k=2)
# =========================
# results sammelt alle Runs

for k in [1, 2]:
    opt_k = make_soft_shared_k_classes_config(
        drf_edge_dir=DRF_DIRS_SMALL["edge"],
        its_edge_dir=ITS_DIRS_SMALL["edge"],
        k=k,
        take_subsets=30,
        min_per_class=5,
        ref_scan=180,
    )
    subset_ids = safe_subset_ids(opt_k["subset_ids"], DRF_DIRS_SMALL["edge"], ITS_DIRS_SMALL["edge"], take=20)

    print(f"\n[S4 | k={k}] using {len(subset_ids)} subsets:", subset_ids[:10], "..." if len(subset_ids)>10 else "")

    # DRF edge
    res = train_svm_from_datasets(
        precomp_dir=DRF_DIRS_SMALL["edge"],
        feature_key="drf_wl",
        subset_ids=subset_ids,
        n=600, test_size=0.2, C=C, seed=seed,
        verbose=False,
    )
    logger.add_result("S1_baseline", "DRF–WL", "edge", 600, 0.2, C, seed, res, subset_ids=subset_ids, k=k)

    # ITS edge
    res = train_svm_from_datasets(
        precomp_dir=ITS_DIRS_SMALL["edge"],
        feature_key="its_wl",
        subset_ids=subset_ids,
        n=600, test_size=0.2, C=C, seed=seed,
        verbose=False,
    )
    logger.add_result("S1_baseline", "ITS–WL", "edge", 600, 0.2, C, seed, res, subset_ids=subset_ids, k=k)

df_results = pd.DataFrame(logger.results)
df_results[df_results["tag"]=="S1_baseline"][["kernel","mode","k","n","test_size","accuracy","subset_ids"]].sort_values(["k","kernel"])

### 2) Feature Mode Comparison

This section evaluates the influence of different feature extraction modes on classification accuracy.
Edge-, vertex-, and shortest-path-based WL features are compared while keeping all other parameters fixed.
The experiment highlights which structural information is most informative for reaction classification.

In [None]:
# =========================
# Section 2: Modes (edge/vertex/sp)
# =========================

for k in [1, 2]:
    opt_k = make_soft_shared_k_classes_config(
        drf_edge_dir=DRF_DIRS_SMALL["edge"],
        its_edge_dir=ITS_DIRS_SMALL["edge"],
        k=k,
        take_subsets=30,
        min_per_class=5,
        ref_scan=180,
    )
    subset_ids = safe_subset_ids(opt_k["subset_ids"], DRF_DIRS_SMALL["edge"], ITS_DIRS_SMALL["edge"], take=20)

    print(f"\n[S2 | k={k}] using {len(subset_ids)} subsets")

    for mode in ["edge", "vertex", "sp"]:
        # DRF
        res = train_svm_from_datasets(
            precomp_dir=DRF_DIRS_SMALL[mode],
            feature_key="drf_wl",
            subset_ids=subset_ids,
            n=600, test_size=0.2, C=C, seed=seed,
            verbose=False,
        )
        logger.add_result("S2_modes", "DRF–WL", mode, 600, 0.2, C, seed, res, subset_ids=subset_ids, k=k)

        # ITS
        res = train_svm_from_datasets(
            precomp_dir=ITS_DIRS_SMALL[mode],
            feature_key="its_wl",
            subset_ids=subset_ids,
            n=600, test_size=0.2, C=C, seed=seed,
            verbose=False,
        )
        logger.add_result("S2_modes", "ITS–WL", mode, 600, 0.2, C, seed, res, subset_ids=subset_ids, k=k)

df_results = pd.DataFrame(logger.results)
df_results[df_results["tag"]=="S2_modes"][["kernel","mode","k","n","accuracy","subset_ids"]].sort_values(["k","kernel","mode"])

### 3) Effect of Dataset Size

To study the scalability and robustness of the kernel-based approach, the dataset size is varied while
keeping the kernel configuration constant. This experiment shows how classification performance changes
as more training data becomes available.

In [None]:
# =========================
# Section 3: Size sweep (n)
# =========================
# WICHTIG: alle Subsets verwenden
subset_ids = None   # <-- DAS ist der richtige Ort

print("\n[Section 3] Using ALL subsets (subset_ids=None)")

for k in [1, 2]:
    opt_k = make_soft_shared_k_classes_config(
        drf_edge_dir=DRF_DIRS_SMALL["edge"],
        its_edge_dir=ITS_DIRS_SMALL["edge"],
        k=k,
        take_subsets=30,
        min_per_class=5,
        ref_scan=180,
    )
    subset_ids = safe_subset_ids(opt_k["subset_ids"], DRF_DIRS_SMALL["edge"], ITS_DIRS_SMALL["edge"], take=20)

    print(f"\n[S3 | k={k}] using {len(subset_ids)} subsets")

    for n in [200, 600, 1200]:
        # DRF edge
        res = train_svm_from_datasets(
            precomp_dir=DRF_DIRS_SMALL["edge"],
            feature_key="drf_wl",
            subset_ids=subset_ids,
            n=n, test_size=0.2, C=C, seed=seed,
            verbose=False,
        )
        logger.add_result("S3_size", "DRF–WL", "edge", n, 0.2, C, seed, res, subset_ids=subset_ids, k=k)

        # ITS edge
        res = train_svm_from_datasets(
            precomp_dir=ITS_DIRS_SMALL["edge"],
            feature_key="its_wl",
            subset_ids=subset_ids,
            n=n, test_size=0.2, C=C, seed=seed,
            verbose=False,
        )
        logger.add_result("S3_size", "ITS–WL", "edge", n, 0.2, C, seed, res, subset_ids=subset_ids, k=k)

# Ergebnisse anzeigen
df_results = pd.DataFrame(logger.results)
df_results[df_results["tag"]=="S3_size"][
    ["kernel", "mode", "n", "accuracy"]
].sort_values(["kernel", "n"])

### 4) Effect of Train/Test Split

This experiment investigates the sensitivity of the SVM classifier to different train/test splits.
By increasing the proportion of test data, we assess the stability and generalization capability of the
kernel-based model.

In [None]:
# =========================
# Section 4: Train/Test split sweep
# =========================

for k in [1, 2]:
    opt_k = make_soft_shared_k_classes_config(
        drf_edge_dir=DRF_DIRS_SMALL["edge"],
        its_edge_dir=ITS_DIRS_SMALL["edge"],
        k=k,
        take_subsets=30,
        min_per_class=5,
        ref_scan=180,
    )

    subset_ids = safe_subset_ids(opt_k["subset_ids"], DRF_DIRS_SMALL["edge"], ITS_DIRS_SMALL["edge"], take=20)
    print(f"\n[S7 | k={k}] using {len(subset_ids)} subsets:", subset_ids[:10], "..." if len(subset_ids)>10 else "")

    for ts in [0.1, 0.2, 0.3, 0.4]:
        # DRF edge
        res = train_svm_from_datasets(
            precomp_dir=DRF_DIRS_SMALL["edge"],
            feature_key="drf_wl",
            subset_ids=subset_ids,
            n=n, test_size=ts, C=C, seed=seed,
            verbose=False,
        )
        logger.add_result("S4_split", "DRF–WL", "edge", n, ts, C, seed, res, subset_ids=subset_ids, k=k)

        # ITS edge
        res = train_svm_from_datasets(
            precomp_dir=ITS_DIRS_SMALL["edge"],
            feature_key="its_wl",
            subset_ids=subset_ids,
            n=n, test_size=ts, C=C, seed=seed,
            verbose=False,
        )
        logger.add_result("S4_split", "ITS–WL", "edge", n, ts, C, seed, res, subset_ids=subset_ids, k=k)

df_results = pd.DataFrame(logger.results)

# Section 7 anzeigen (nicht S4!)
df_results[df_results["tag"]=="S4_split"][["kernel","k","mode","n","test_size","accuracy","subset_ids"]].sort_values(["k","kernel","test_size"])

## Summary of Classification Results

This section summarizes the classification results obtained across all experiments.
The comparison highlights the strengths and limitations of different kernel representations, feature modes,
and dataset configurations, and provides an overall assessment of the kernel-based reaction classification approach.

In [None]:
# =========================
# Section 8: Summary + Plots
# =========================

import numpy as np
import pandas as pd

# -------------------------
# 8.1 Build results DataFrame
# -------------------------
df_results = pd.DataFrame(logger.results).copy()

print("Rows:", len(df_results))
display(df_results.head(10))

# Ensure clean dtypes (helps grouping/plots)
for col in ["k", "n", "seed"]:
    if col in df_results.columns:
        df_results[col] = pd.to_numeric(df_results[col], errors="coerce")

if "test_size" in df_results.columns:
    df_results["test_size"] = pd.to_numeric(df_results["test_size"], errors="coerce")

# Normalize kernel name for comparisons (DRF/ITS)
df_results["kernel_norm"] = (
    df_results["kernel"].astype(str).str.upper()
    .str.replace("–", "-", regex=False)
)

print("\nKernel counts (normalized):")
print(df_results["kernel_norm"].value_counts())

# -------------------------
# 8.2 Sanity: do we have DRF and ITS for same setting?
# -------------------------
group_cols = ["tag", "mode", "k", "n", "test_size"]

present = (
    df_results.groupby(group_cols)["kernel_norm"]
    .apply(lambda s: set(s))
)

missing_pairs = present[~present.apply(lambda u: {"DRF", "ITS"}.issubset(u))]

print("\nCombinations missing DRF or ITS:", len(missing_pairs))
if len(missing_pairs) > 0:
    display(missing_pairs.head(20))

# -------------------------
# 8.3 Quick summary table
# -------------------------
SHOW_TAGS = ["S4_baseline", "S5_modes", "S6_size", "S7_split"]

summary = (
    df_results[df_results["tag"].isin(SHOW_TAGS)]
    [["tag","kernel","mode","k","n","test_size","accuracy","subset_ids"]]
    .sort_values(["tag","k","kernel","mode","n","test_size"])
)

display(summary)

# Optional: mean/std across seeds (if you ran multiple seeds)
agg = (
    df_results[df_results["tag"].isin(SHOW_TAGS)]
    .groupby(["tag","kernel","mode","k","n","test_size"], as_index=False)
    .agg(acc_mean=("accuracy","mean"), acc_std=("accuracy","std"), runs=("accuracy","size"))
    .sort_values(["tag","k","kernel","mode","n","test_size"])
)
display(agg)

# -------------------------
# 8.4 Dashboard plots (your helper)
# -------------------------
figs = plot_experiment_dashboard(df_results, title_prefix="WP3 (k=1 vs k=2)")

# show only the ones that exist (prevents KeyError)
for name, fig in figs.items():
    print("Showing:", name)
    fig.show()

# -------------------------
# 8.5 Fig-2 style plot (needs matching y length!)
# -------------------------
nK = K_drf.shape[0]
y_small = np.asarray(y)[:nK]  # MUST match K rows

fig, info = fig2_style_svm_from_kernel(
    K_drf,
    y_small,
    C=1.0,
    n_points_grid=250,
    seed=42,
    title=f"DRF–WL edge | Fig-2 style (n={nK})",
)
print(info)
fig.show()

# -------------------------
# 8.6 Extra comparison plots (only call if your funcs exist)
# -------------------------
# These usually expect df_results with k present
if "k" in df_results.columns and df_results["k"].notna().any():
    plot_heatmaps_by_k(df_results)
    plot_difference_heatmap(df_results)
    plot_drf_minus_its_bar(df_results)
    plot_drf_vs_its_dots(df_results)
    plot_accuracy_by_k(df_results)
else:
    print("[WARN] df_results has no k values -> skipping k-based plots.")

### Fig-2-Style SVM Visualization (Kernel Space → 2D)

To make the SVM decision boundary interpretable, we project the **precomputed kernel matrix** into 2D using **KernelPCA**.
On this 2D embedding we train a **linear SVM** and visualize:

- **Scatter + support vectors** (points influencing the margin)
- A **grid** colored by the predicted class
- The **decision boundary** as the contour where the decision function is close to zero

This mirrors the visualization idea from the reference paper, where grid points with predicted values close to 0 trace the separating boundary.

In [None]:
n = K_drf.shape[0]          # z.B. 200
y_small = np.asarray(y)[:n] # MUSS gleich lang sein wie K

fig, info = fig2_style_svm_from_kernel(
    K_drf,
    y_small,
    C=1.0,
    n_points_grid=250,
    seed=42,
    title=f"DRF–WL edge | Fig-2 style (n={n})"
)
print(info)
fig.show()