# Fit B in the FoM TXbb and BDT space

# Misc Checks

In [1]:
import pandas as pd
import numpy as np
import vector
import os
from xgboost import XGBClassifier
from pathlib import Path

import HH4b.utils as utils
from HH4b.utils import ShapeVar
import HH4b.plotting as plotting
from HH4b.postprocessing import PostProcess, Region
import HH4b.postprocessing as postprocessing
from HH4b.hh_vars import samples, years, samples_run3

import hist
import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker
import importlib

formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))

In [2]:
pdf = pd.read_json("df_txbb_0pt6_bdt_0pt6.json")

In [None]:
pdf

# Fitting Starts

In [4]:
import matplotlib.pyplot as plt
import numpy as np

from matplotlib import cm
from matplotlib.ticker import LinearLocator

In [5]:
# plot region parameter
txbb_low = 0.95
bdt_low = 0.95
# x_low = 0.6
# y_low = 0.6

# scan parameters
txbb_stepsize = 0.002
bdt_stepsize = 0.005

In [6]:
# init data
Txbb = np.array(df["txbb_cut"])
BDT = np.array(df["bdt_cut"])
B = np.array(df["nevents_regionB"])

# select data in the plotting region
Txbb_cond = Txbb >= txbb_low
BDT_cond = BDT >= bdt_low
cond = Txbb_cond & BDT_cond

Txbb = Txbb[cond]
BDT = BDT[cond]
B = B[cond]

### Add nevents_B = 0 at boundaries to make interpolation more physical

In [7]:
# at Txbb = 1
for bdt_val in np.unique(BDT):
    Txbb = np.append(Txbb, 1)
    BDT = np.append(BDT, bdt_val)
    B = np.append(B, 0.0)

# at BDT = 1
for txbb_val in np.unique(Txbb):
    Txbb = np.append(Txbb, txbb_val)
    BDT = np.append(BDT, 1)
    B = np.append(B, 0.0)

In [None]:
df[df["nevents_regionB"] == 0]

In [9]:
txbb_bins = np.arange(txbb_low - txbb_stepsize / 2, 1 + txbb_stepsize, txbb_stepsize)
bdt_bins = np.arange(bdt_low - bdt_stepsize / 2, 1 + bdt_stepsize, bdt_stepsize)

In [10]:
# check if the predictions are smooth
test_Txbb_stepsize = 0.0002
test_BDT_stepsize = 0.0005

test_Txbb_bins = np.arange(
    txbb_low - test_Txbb_stepsize / 2, 1 + test_Txbb_stepsize, test_Txbb_stepsize
)
test_BDT_bins = np.arange(bdt_low - test_BDT_stepsize / 2, 1 + test_BDT_stepsize, test_BDT_stepsize)

In [11]:
test_Txbb_range = np.arange(txbb_low, 1, test_Txbb_stepsize)
test_BDT_range = np.arange(bdt_low, 1, test_BDT_stepsize)
test_Txbb_grid, test_BDT_grid = np.meshgrid(test_Txbb_range, test_BDT_range)
test_Txbb = test_Txbb_grid.flatten()
test_BDT = test_BDT_grid.flatten()

## RBF fitting (plots are for checking if the fitting is successful)

In [12]:
from scipy.interpolate import Rbf

In [13]:
from scipy.spatial.distance import cdist

In [14]:
def calc_distance(a, b, option):
    x = np.concatenate([a.reshape(-1, 1), b.reshape(-1, 1)], axis=1)
    matrix = cdist(x, np.zeros_like(x), option)
    return np.diagonal(matrix)

In [20]:
styles = [
    #    "euclidean",
    #    "seuclidean",
    "cityblock",
    #     "chebyshev",
    #     "minkowski",
    #     "hamming",
    #     "cosine",
    #     "jaccard",
    #     "mahalanobis",
    #     "canberra",
    #     "braycurtis",
    #     "dice",
    #     "kulsinski"
]

In [None]:
fig, ax = plt.subplots()
for style in styles:
    distance = calc_distance((Txbb - 1), (BDT - 1), style)
    ax.scatter(
        (distance - distance.min()) / (distance.max() - distance.min()),
        B,
        s=10,
        label=style + "from (1,1)",
    )

for style in styles:
    distance = calc_distance((0.9 - Txbb), (0.9 - BDT), style)
    ax.scatter(
        (distance - distance.min()) / (distance.max() - distance.min()),
        B,
        s=10,
        label=style + "from (0.9,0.9)",
    )

distance = (1.01 - Txbb) * (1 - BDT)
ax.scatter(distance / distance.max(), B, s=10, label="product")
ax.legend()

In [42]:
rbfi = Rbf(
    Txbb - 0.9, BDT - 0.9, B, function="multiquadric", norm="seuclidean", epsilon=0.1, smooth=1e-5
)

In [43]:
B_RBF_Fitted = rbfi(Txbb - 0.9, BDT - 0.9)

In [None]:
# Plot the surface.
heatmap, Txbbedges, BDTedges = np.histogram2d(
    Txbb, BDT, bins=[txbb_bins, bdt_bins], weights=B_RBF_Fitted
)

# Create the heatmap plot
plt.imshow(
    heatmap.T,
    extent=[Txbbedges[0], Txbbedges[-1], BDTedges[0], BDTedges[-1]],
    origin="lower",
    cmap="hot",
)
plt.colorbar()  # add a colorbar
plt.title("RBF Fitted B Values")
plt.xlabel("tTxbbbb_cut")
plt.ylabel("bdt_cut")
plt.show("Number of Data in Region B")

In [None]:
# Plot the surface.
heatmap, Txbbedges, BDTedges = np.histogram2d(
    Txbb, BDT, bins=[txbb_bins, bdt_bins], weights=(B_RBF_Fitted - B) / B
)

# Create the heatmap plot
plt.imshow(
    heatmap.T,
    extent=[Txbbedges[0], Txbbedges[-1], BDTedges[0], BDTedges[-1]],
    origin="lower",
    cmap="hot",
)
plt.colorbar()  # add a colorbar
plt.title("Diff(RBF, Scanned) of B (0.1 is 10%)")
plt.xlabel("tTxbbbb_cut")
plt.ylabel("bdt_cut")
plt.show("Number of Data in Region B")

## RBF Interpolation (smooth check)

In [46]:
B_RBF_Pred = rbfi(test_Txbb - 0.9, test_BDT - 0.9)

In [None]:
# Plot the surface.
heatmap, Txbbedges, BDTedges = np.histogram2d(
    test_Txbb, test_BDT, bins=[test_Txbb_bins, test_BDT_bins], weights=B_RBF_Pred
)

# Create the heatmap plot
plt.imshow(
    heatmap.T,
    extent=[Txbbedges[0], Txbbedges[-1], BDTedges[0], BDTedges[-1]],
    origin="lower",
    cmap="hot",
)
plt.colorbar()  # add a colorbar
plt.title("RBF B Predictions on 10x Finer Grid")
plt.xlabel("txbb_cut")
plt.ylabel("bdt_cut")
plt.show("Number of Data in Region B")

## RBF 1D smooth check

### 1. Fix txbb cut, vary bdt cut

In [None]:
fix_txbb_cuts = [0.93, 0.95, 0.97, 0.99]


for txbb_cut in fix_txbb_cuts:
    # init plot
    fig, ax = plt.subplots(figsize=(10, 8))

    # plot data
    txbb_select = np.isclose(df["txbb_cut"], txbb_cut)
    bdt_select = df["bdt_cut"] >= bdt_low
    select = txbb_select & bdt_select

    BDT_1D = df[select]["bdt_cut"]
    B_1D = df[select]["nevents_regionB"]

    # plot interpolation
    test_BDT_1D = np.arange(bdt_low, 1, test_BDT_stepsize)
    test_Txbb_1D = np.ones_like(test_BDT_1D) * txbb_cut

    B_RBF_Pred_1D = rbfi(test_Txbb_1D - 0.9, test_BDT_1D - 0.9)

    ax.scatter(BDT_1D, B_1D, c="y", label="measurements")
    ax.plot(test_BDT_1D, B_RBF_Pred_1D, label="interpolation")
    ax.set(
        xlabel="bdt cut",
        ylabel="Number of Events in Region B",
        title=f"RBF Interpolation @ Txbb cut = {txbb_cut}",
    )
    ax.legend()
    plt.show()

### 2. Fix bdt cut, vary txbb cut

In [None]:
fix_bdt_cuts = [0.93, 0.95, 0.97, 0.99]


for bdt_cut in fix_bdt_cuts:
    # init plot
    fig, ax = plt.subplots(figsize=(10, 8))

    # plot data
    bdt_select = np.isclose(df["bdt_cut"], bdt_cut)
    txbb_select = df["txbb_cut"] >= txbb_low
    select = txbb_select & bdt_select

    Txbb_1D = df[select]["txbb_cut"]
    B_1D = df[select]["nevents_regionB"]

    # plot interpolation
    test_Txbb_1D = np.arange(txbb_low, 1, test_Txbb_stepsize)
    test_BDT_1D = np.ones_like(test_Txbb_1D) * bdt_cut

    B_RBF_Pred_1D = rbfi(test_Txbb_1D - 0.9, test_BDT_1D - 0.9)

    ax.scatter(Txbb_1D, B_1D, c="y", label="measurements")
    ax.plot(test_Txbb_1D, B_RBF_Pred_1D, label="interpolation")
    ax.set(
        xlabel="txbb cut",
        ylabel="Number of Events in Region B",
        title=f"RBF Interpolation @ BDT cut = {bdt_cut}",
    )
    ax.legend()
    plt.show()

## RBF fit with product as input

In [50]:
fit_product = (1 - Txbb) * (1 - BDT)

In [51]:
rbfi = Rbf(fit_product, B, function="multiquadric", norm="euclidean", epsilon=0.02, smooth=1e-3)

In [52]:
B_RBF_Fitted = rbfi(fit_product)

In [None]:
# Plot the surface.
heatmap, Txbbedges, BDTedges = np.histogram2d(
    Txbb, BDT, bins=[txbb_bins, bdt_bins], weights=B_RBF_Fitted
)

# Create the heatmap plot
plt.imshow(
    heatmap.T,
    extent=[Txbbedges[0], Txbbedges[-1], BDTedges[0], BDTedges[-1]],
    origin="lower",
    cmap="hot",
)
plt.colorbar()  # add a colorbar
plt.title("RBF Fitted B Values")
plt.xlabel("tTxbbbb_cut")
plt.ylabel("bdt_cut")
plt.show("Number of Data in Region B")

In [None]:
# Plot the surface.
heatmap, Txbbedges, BDTedges = np.histogram2d(
    Txbb, BDT, bins=[txbb_bins, bdt_bins], weights=(B_RBF_Fitted - B) / B
)

# Create the heatmap plot
plt.imshow(
    heatmap.T,
    extent=[Txbbedges[0], Txbbedges[-1], BDTedges[0], BDTedges[-1]],
    origin="lower",
    cmap="hot",
)
plt.colorbar()  # add a colorbar
plt.title("Diff(RBF, Scanned) of B (0.1 is 10%)")
plt.xlabel("tTxbbbb_cut")
plt.ylabel("bdt_cut")
plt.show("Number of Data in Region B")

In [55]:
test_product = (1 - test_Txbb) * (1 - test_BDT)
B_RBF_Pred = rbfi(test_product)

In [None]:
# Plot the surface.
heatmap, Txbbedges, BDTedges = np.histogram2d(
    test_Txbb, test_BDT, bins=[test_Txbb_bins, test_BDT_bins], weights=B_RBF_Pred
)

# Create the heatmap plot
plt.imshow(
    heatmap.T,
    extent=[Txbbedges[0], Txbbedges[-1], BDTedges[0], BDTedges[-1]],
    origin="lower",
    cmap="hot",
)
plt.colorbar()  # add a colorbar
plt.title("RBF B Predictions on 10x Finer Grid")
plt.xlabel("txbb_cut")
plt.ylabel("bdt_cut")
plt.show("Number of Data in Region B")

## 1D smooth check for this RBF fitted on product feature

### fix txbb, vary bdt

In [None]:
fix_txbb_cuts = [0.93, 0.95, 0.97, 0.99]


for txbb_cut in fix_txbb_cuts:
    # init plot
    fig, ax = plt.subplots(figsize=(10, 8))

    # plot data
    txbb_select = np.isclose(df["txbb_cut"], txbb_cut)
    bdt_select = df["bdt_cut"] >= bdt_low
    select = txbb_select & bdt_select

    BDT_1D = df[select]["bdt_cut"]
    B_1D = df[select]["nevents_regionB"]

    # plot interpolation
    test_BDT_1D = np.arange(bdt_low, 1, test_BDT_stepsize)
    test_Txbb_1D = np.ones_like(test_BDT_1D) * txbb_cut

    B_RBF_Pred_1D = rbfi((1 - test_BDT_1D) * (1 - test_Txbb_1D))

    ax.scatter(BDT_1D, B_1D, c="y", label="measurements")
    ax.plot(test_BDT_1D, B_RBF_Pred_1D, label="interpolation")
    ax.set(
        xlabel="bdt cut",
        ylabel="Number of Events in Region B",
        title=f"RBF Interpolation @ Txbb cut = {txbb_cut}",
    )
    ax.legend()
    plt.show()

### fix bdt, vary txbb

In [None]:
fix_bdt_cuts = [0.93, 0.95, 0.97, 0.99]


for bdt_cut in fix_bdt_cuts:
    # init plot
    fig, ax = plt.subplots(figsize=(10, 8))

    # plot data
    bdt_select = np.isclose(df["bdt_cut"], bdt_cut)
    txbb_select = df["txbb_cut"] >= txbb_low
    select = txbb_select & bdt_select

    Txbb_1D = df[select]["txbb_cut"]
    B_1D = df[select]["nevents_regionB"]

    # plot interpolation
    test_Txbb_1D = np.arange(txbb_low, 1, test_Txbb_stepsize)
    test_BDT_1D = np.ones_like(test_Txbb_1D) * bdt_cut

    B_RBF_Pred_1D = rbfi((1 - test_Txbb_1D) * (1 - test_BDT_1D))

    ax.scatter(Txbb_1D, B_1D, c="y", label="measurements")
    ax.plot(test_Txbb_1D, B_RBF_Pred_1D, label="interpolation")
    ax.set(
        xlabel="txbb cut",
        ylabel="Number of Events in Region B",
        title=f"RBF Interpolation @ BDT cut = {bdt_cut}",
    )
    ax.legend()
    plt.show()

## Monotone Cubic Decreasing

### Check if I can let the function take |x| as an input to do 1d fitting

In [None]:
select = np.isclose(df["txbb_cut"], 0.970) & np.isclose(df["bdt_cut"], 0.970)
df[select]["nevents_regionB"]

### Conclusion: no, because txbb_cut=1 or bdt_cut = 1 gives 0, but a 45 degrees vector of length 0.1 from (0.9, 0.9) gives non-zero

### Trying to add an angle factor 

In [681]:
def monotone_cubic_interpolation(x, y):
    n = len(x)
    # Compute slopes of secant lines between successive points
    delta_x = np.diff(x)
    delta_y = np.diff(y)
    m = delta_y / delta_x

    # Initialize tangent slopes (m_t) at each point with shape preservation
    m_t = np.zeros_like(y)
    m_t[1:-1] = (m[:-1] + m[1:]) / 2.0

    # Adjust tangent slopes to ensure the monotonicity
    for i in range(1, n - 1):
        if np.sign(delta_y[i - 1]) != np.sign(delta_y[i]):
            m_t[i] = 0
        elif np.abs(m[i - 1]) < np.abs(m[i]) or (m[i - 1] == 0 and m[i] != 0):
            m_t[i] = m[i - 1]
        elif np.abs(m[i - 1]) > np.abs(m[i]) or (m[i] == 0 and m[i - 1] != 0):
            m_t[i] = m[i]

    # Boundary conditions: use non-centered, shape-preserving three-point formula
    m_t[0] = ((2 * delta_x[0] + delta_x[1]) * m[0] - delta_x[0] * m[1]) / (delta_x[0] + delta_x[1])
    if np.sign(m_t[0]) != np.sign(m[0]):
        m_t[0] = 0
    m_t[-1] = ((2 * delta_x[-2] + delta_x[-3]) * m[-2] - delta_x[-2] * m[-3]) / (
        delta_x[-2] + delta_x[-3]
    )
    if np.sign(m_t[-1]) != np.sign(m[-2]):
        m_t[-1] = 0

    # Compute coefficients for the cubic polynomials
    a = y[:-1]
    b = m_t[:-1]
    c = (3 * m - 2 * m_t[:-1] - m_t[1:]) / delta_x
    d = (m_t[:-1] + m_t[1:] - 2 * m) / delta_x**2

    # Define function to evaluate spline at a given point
    def spline(t):
        # Find the interval index such that x[i] <= t < x[i+1]
        i = np.searchsorted(x, t) - 1
        i = np.clip(i, 0, n - 2)
        dx = t - x[i]
        return a[i] + b[i] * dx + c[i] * dx**2 + d[i] * dx**3

    return spline

In [694]:
# init data
Txbb = np.array(df["txbb_cut"])
BDT = np.array(df["bdt_cut"])
B = np.array(df["nevents_regionB"])

# select data in the plotting region
Txbb_cond = Txbb >= txbb_low
BDT_cond = BDT >= bdt_low
cond = Txbb_cond & BDT_cond

Txbb = Txbb[cond]
BDT = BDT[cond]
B = B[cond]

In [700]:
fn = monotone_cubic_interpolation((1 - Txbb) * (1 - BDT), B.astype("float"))

In [None]:
fix_txbb_cuts = [0.93, 0.95, 0.97, 0.99]


for txbb_cut in fix_txbb_cuts:
    # init plot
    fig, ax = plt.subplots(figsize=(10, 8))

    # data for fitting and plotting
    txbb_select = np.isclose(df["txbb_cut"], txbb_cut)
    bdt_select = df["bdt_cut"] >= bdt_low
    select = txbb_select & bdt_select

    BDT_1D = np.array(df[select]["bdt_cut"])
    B_1D = np.array(df[select]["nevents_regionB"])

    # interpolation for plotting
    test_BDT_1D = np.arange(bdt_low, 1, test_BDT_stepsize)
    test_Txbb_1D = np.ones_like(test_BDT_1D) * txbb_cut

    fn_pred = fn((1 - test_Txbb_1D) * (1 - test_BDT_1D))

    ax.scatter(BDT_1D, B_1D, c="y", label="measurements")
    ax.plot(test_BDT_1D, fn_pred, label="interpolation")
    ax.set(
        xlabel="bdt cut",
        ylabel="Number of Events in Region B",
        title=f"RBF Interpolation @ Txbb cut = {txbb_cut}",
    )
    ax.legend()
    plt.show()

In [None]:
fix_bdt_cuts = [0.93, 0.95, 0.97, 0.99]


for bdt_cut in fix_bdt_cuts:
    # init plot
    fig, ax = plt.subplots(figsize=(10, 8))

    # plot data
    bdt_select = np.isclose(df["bdt_cut"], bdt_cut)
    txbb_select = df["txbb_cut"] >= txbb_low
    select = txbb_select & bdt_select

    Txbb_1D = np.array(df[select]["txbb_cut"])
    B_1D = np.array(df[select]["nevents_regionB"])

    # fit
    fn = monotone_cubic_interpolation(Txbb_1D, B_1D)

    # plot interpolation
    test_Txbb_1D = np.arange(txbb_low, 1, test_Txbb_stepsize)
    test_BDT_1D = np.ones_like(test_Txbb_1D) * bdt_cut

    fn_pred = fn(test_Txbb_1D)

    ax.scatter(Txbb_1D, B_1D, c="y", label="measurements")
    ax.plot(test_Txbb_1D, fn_pred, label="interpolation")
    ax.set(
        xlabel="txbb cut",
        ylabel="Number of Events in Region B",
        title=f"RBF Interpolation @ BDT cut = {bdt_cut}",
    )
    ax.legend()
    plt.show()

# Optimization on RBF Interpolation

## Old stuff

In [18]:
s_greater_0pt6 = df_billy["s"] > 0.6
s_greater_0pt7 = df_billy["s"] > 0.7
s_greater_1 = df_billy["s"] > 1

In [None]:
best_row_s_greater_1 = df_billy[s_greater_1]["2*sqrt(B+sigma_B^2)/S"].argmin()
df_billy[s_greater_1].iloc[best_row_s_greater_1]

In [None]:
best_row_s_greater_0pt7 = df_billy[s_greater_0pt7]["2*sqrt(B+sigma_B^2)/S"].argmin()
df_billy[s_greater_0pt7].iloc[best_row_s_greater_0pt7]

In [None]:
best_row_s_greater_0pt6 = df_billy[s_greater_0pt6]["2*sqrt(B+sigma_B^2)/S"].argmin()
df_billy[s_greater_0pt6].iloc[best_row_s_greater_0pt6]

In [None]:
best_row = df_billy["2*sqrt(B+sigma_B^2)/S"].argmin()
df_billy.iloc[best_row]

In [None]:
b_greater_1 = df_billy["b"] > 1
best_row_b_greater_1 = df_billy[b_greater_1]["2*sqrt(B+sigma_B^2)/S"].argmin()
df_billy[b_greater_1].iloc[best_row_b_greater_1]

In [None]:
b_greater_2 = df_billy["b"] > 2
best_row_b_greater_2 = df_billy[b_greater_2]["2*sqrt(B+sigma_B^2)/S"].argmin()
df_billy[b_greater_2].iloc[best_row_b_greater_2]

In [None]:
b_greater_2pt8 = df_billy["b"] > 2.8
best_row_b_greater_2pt8 = df_billy[b_greater_2pt8]["2*sqrt(B+sigma_B^2)/S"].argmin()
df_billy[b_greater_2pt8].iloc[best_row_b_greater_2pt8]

In [None]:
import pprint

print(s)
print(tot)
pprint.pprint(dicts)

## Old stuff:

In [None]:
sig_samples = {"hh4b": samples[year]["hh4b"]}

In [None]:
events = pd.read_parquet(
    Path(data_dir) / "2022EE" / "GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV" / "parquet"
)

In [None]:
samples = ["qcd", "ttbar"]
mass = "bbFatJetMsd"
tagger = "bbFatJetPNetTXbbLegacy"
i = 1

for sample in samples:
    plt.figure(figsize=(10, 10))
    plt.title(sample)
    for cut in [0, 0.8, 0.9, 0.95]:
        cut_mask = events_dict[sample][tagger][i] >= cut
        plt.hist(
            events_dict[sample][mass][i][cut_mask],
            np.arange(60, 251, 10),
            weights=events_dict[sample]["finalWeight"][cut_mask],
            histtype="step",
            label=rf"$T_{{Xbb}} \geq {cut}$",
            density=True,
        )

    plt.xlabel(f"Jet {i+1} {mass} (GeV)")
    plt.legend()
    plt.savefig(plot_dir / f"{sample}_{mass}{i}_{tagger}_sculpting.pdf", bbox_inches="tight")
    plt.show()

## BDT ROC Curve

## tt ROC curve

In [None]:
jet = 1
tagger = "bbFatJetPNetTXbbLegacy"
sig_jets_score = events_dict["hh4b"][tagger][jet]
bg_jets_score = {
    "qcd": events_dict["qcd"][tagger][jet],
    "ttbar": events_dict["ttbar"][tagger][jet],
}

In [None]:
from sklearn.metrics import roc_curve

bg_skip = 1
sig_key = "hh4b"
weight_key = "finalWeight"
rocs = {}

for bg_key in ["qcd", "ttbar"]:
    print(bg_key)
    y_true = np.concatenate(
        [
            np.ones(len(sig_jets_score)),
            np.zeros((len(bg_jets_score[bg_key]) - 1) // bg_skip + 1),
        ]
    )

    weights = np.concatenate(
        [
            events_dict[sig_key][weight_key].to_numpy(),
            events_dict[bg_key][weight_key].to_numpy()[::bg_skip],
        ]
    )

    scores = np.concatenate((sig_jets_score, bg_jets_score[bg_key][::bg_skip]))

    fpr, tpr, thresholds = roc_curve(y_true, scores, sample_weight=weights)

    rocs[bg_key] = {
        "fpr": fpr,
        "tpr": tpr,
        "thresholds": thresholds,
        "label": plotting.label_by_sample[bg_key],
    }

In [None]:
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

In [None]:
plotting.multiROCCurveGrey(
    {"test": rocs},
    [0.2, 0.5],
    xlim=[0, 0.8],
    ylim=[1e-5, 1],
    plot_dir=plot_dir,
    name=f"{tagger}_ROCs",
    show=True,
)