In [None]:
import uproot
import awkward as ak
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display, HTML

# Set the output box size for images
display(
    HTML(
        "<style>.output_png, .output_jpeg, .output_svg {height: 500px; overflow-y: scroll;}</style>"
    )
)

# Branches for muon pixel tracks
main_branch = "Events"
tk_branches = [
    "muon_pixel_tracks_p",
    "muon_pixel_tracks_pt",
    "muon_pixel_tracks_ptErr",
    "muon_pixel_tracks_eta",
    "muon_pixel_tracks_etaErr",
    "muon_pixel_tracks_phi",
    "muon_pixel_tracks_phiErr",
    "muon_pixel_tracks_chi2",
    "muon_pixel_tracks_normalizedChi2",
    "muon_pixel_tracks_nPixelHits",
    "muon_pixel_tracks_nTrkLays",
    "muon_pixel_tracks_nFoundHits",
    "muon_pixel_tracks_nLostHits",
    "muon_pixel_tracks_dsz",
    "muon_pixel_tracks_dszErr",
    "muon_pixel_tracks_dxy",
    "muon_pixel_tracks_dxyErr",
    "muon_pixel_tracks_dz",
    "muon_pixel_tracks_dzErr",
    "muon_pixel_tracks_qoverp",
    "muon_pixel_tracks_qoverpErr",
    "muon_pixel_tracks_lambdaErr",
    "muon_pixel_tracks_matched",
    "muon_pixel_tracks_duplicate",
    "muon_pixel_tracks_tpPdgId",
    "muon_pixel_tracks_tpPt",
    "muon_pixel_tracks_tpEta",
    "muon_pixel_tracks_tpPhi",
]
gen_branches = [
    "GenPart_pt",
    "GenPart_eta",
    "GenPart_phi",
    "GenPart_mass",
    "GenPart_pdgId",
    "GenPart_statusFlags",  # added to select last-copy muons
]

l1tkMuon_branches = [
    "L1TkMu_pt",
    "L1TkMu_eta",
    "L1TkMu_phi",
]

legacy = False
allPixel = False

filesSelector = [
    "validation/10k_ZMM200PU_ext.root",
    "validation/9k_TTbar200PU_ext.root",
]

filesAllPixel = [
    "validation/9k_TTbar200PUAllPixel_ext.root"
]

files = filesSelector if not allPixel else filesAllPixel

if legacy:
    for i, f in enumerate(files):
        files[i] = f.replace("_ext", "_legacy")
print(files)

arrays = []
for f in files:
    with uproot.open(f) as file:
        arrays_f = file[main_branch].arrays(
            tk_branches + gen_branches + l1tkMuon_branches
        )
        arrays = ak.concatenate([arrays, arrays_f], axis=0)
print(f"Loaded {len(arrays)} events")

In [None]:
# Selection parameters (same as TP selector for Muons)
MUON_ABS_PDGID = 13
PT_MIN = 0.9
PT_MIN_L1 = 4.0  # L1TkMu pt min
ETA_MAX = 2.4
DR_MATCH = 0.01
RELPT_MATCH = 0.1

# MTV-like histo parameters
MTV_MIN_PT = 0.9
MTV_MAX_PT = 2000.0
MTV_N_PT = 50
USE_LOG_PT = True
MTV_MIN_ETA = -2.5
MTV_MAX_ETA = 2.5
MTV_N_ETA = 50
MTV_MIN_PHI = -3.1416
MTV_MAX_PHI = 3.1416
MTV_N_PHI = 36

### Utilities

In [None]:
# Helpers
def wrap_phi(phi):
    return ((phi + np.pi) % (2 * np.pi)) - np.pi


def binned(mask, vals, bins):
    num, _ = np.histogram(vals[mask], bins=bins)
    den, _ = np.histogram(vals, bins=bins)
    return num, den


def wilson(num, den, z=1.0):
    # Safe Wilson interval: avoid divide-by-zero and suppress warnings for empty bins
    num = num.astype(float)
    den = den.astype(float)
    mask = den > 0
    p = np.zeros_like(num, dtype=float)
    p[mask] = num[mask] / den[mask]
    center = np.zeros_like(p)
    half = np.zeros_like(p)
    if np.any(mask):
        denm = den[mask]
        denom = 1 + z**2 / denm
        center[mask] = (p[mask] + z**2 / (2 * denm)) / denom
        half[mask] = (
            z * np.sqrt(p[mask] * (1 - p[mask]) / denm + z**2 / (4 * denm**2)) / denom
        )
    return center, half


In [None]:
# Track quality parameters plotter
def plot_track_quality_parameters(
    arrays=arrays, selection=(arrays.muon_pixel_tracks_pt >= 0)
):
    metrics = []
    pt = arrays.muon_pixel_tracks_pt[selection]
    metrics.append(pt)
    eta = arrays.muon_pixel_tracks_eta[selection]
    metrics.append(eta)
    chi2 = arrays.muon_pixel_tracks_chi2[selection]
    metrics.append(chi2)
    normalizedChi2 = arrays.muon_pixel_tracks_normalizedChi2[selection]
    metrics.append(normalizedChi2)
    nPixelHits = arrays.muon_pixel_tracks_nPixelHits[selection]
    metrics.append(nPixelHits)
    nTrkLays = arrays.muon_pixel_tracks_nTrkLays[selection]
    metrics.append(nTrkLays)
    nFoundHits = arrays.muon_pixel_tracks_nFoundHits[selection]
    metrics.append(nFoundHits)
    nLostHits = arrays.muon_pixel_tracks_nLostHits[selection]
    metrics.append(nLostHits)
    dxy = arrays.muon_pixel_tracks_dxy[selection]
    metrics.append(dxy)
    dxyErr = arrays.muon_pixel_tracks_dxyErr[selection]
    metrics.append(dxyErr)
    dz = arrays.muon_pixel_tracks_dz[selection]
    metrics.append(dz)
    dzErr = arrays.muon_pixel_tracks_dzErr[selection]
    metrics.append(dzErr)
    etaErr = arrays.muon_pixel_tracks_etaErr[selection]
    metrics.append(etaErr)
    phiErr = arrays.muon_pixel_tracks_phiErr[selection]
    metrics.append(phiErr)
    qoverp = arrays.muon_pixel_tracks_qoverp[selection]
    metrics.append(qoverp)
    qoverpErr = arrays.muon_pixel_tracks_qoverpErr[selection]
    metrics.append(qoverpErr)
    lambdaErr = arrays.muon_pixel_tracks_lambdaErr[selection]
    metrics.append(lambdaErr)
    dsz = arrays.muon_pixel_tracks_dsz[selection]
    metrics.append(dsz)
    dszErr = arrays.muon_pixel_tracks_dszErr[selection]
    metrics.append(dszErr)

    matched = arrays.muon_pixel_tracks_matched[selection]

    nbins = [
        200,
        50,
        15,
        15,
        15,
        15,
        15,
        10,
        100,
        100,
        100,
        100,
        100,
        100,
        100,
        100,
        100,
        100,
        100,
    ]
    ranges = [
        (0, 200),
        (-2.5, 2.5),
        (0, 15),
        (0, 15),
        (0, 15),
        (0, 15),
        (0, 15),
        (0, 10),
        (-0.1, 0.1),
        (0, 0.05),
        (-20, 20),
        (0, 0.05),
        (0, 0.006),
        (0, 0.02),
        (-0.5, 0.5),
        (0, 0.07),
        (-15, 15),
        (0, 0.03),
        (0, 0.003),
    ]
    labels = [
        r"$p_{T}$",
        r"$\eta$",
        r"$\chi^{2}$",
        r"$\text{normalized } \chi^{2}$",
        "nPixelHits",
        "nTrkLays",
        "nFoundHits",
        "nLostHits",
        r"$d_{xy}$",
        "dxyErr",
        r"$d_{z}",
        "dzErr",
        "etaErr",
        "phiErr",
        "qoverp",
        "qoverpErr",
        "dsz",
        "dszErr",
        "lambdaErr",
    ]

    assert len(metrics) == len(nbins) == len(ranges) == len(labels)

    fig = plt.figure(figsize=(12, 28))
    for i in range(1, len(metrics) + 1):
        plt.subplot(10, 2, i)
        plt.hist(
            ak.to_numpy(ak.flatten(metrics[i - 1])),
            bins=nbins[i - 1],
            range=ranges[i - 1],
            histtype="step",
            label="All",
        )
        plt.hist(
            ak.to_numpy(ak.flatten(metrics[i - 1][matched == 1])),
            bins=nbins[i - 1],
            range=ranges[i - 1],
            histtype="step",
            label="Matched",
        )
        plt.hist(
            ak.to_numpy(ak.flatten(metrics[i - 1][matched == 0])),
            bins=nbins[i - 1],
            range=ranges[i - 1],
            histtype="step",
            label="Fake",
        )
        if labels[i - 1] == r"$p_{T}$":
            plt.xscale("log")
        plt.xlabel(f"Muon PixelTracks {labels[i - 1]}")
        plt.ylabel("Entries")
        plt.legend()
        plt.title(f"Muon PixelTracks {labels[i - 1]} distribution")
    plt.tight_layout()
    return fig

In [None]:
# Efficiency and fakerate plotter
def plot_efficiency_and_fake(
    arrays=arrays, selection=(arrays.muon_pixel_tracks_pt >= 0)
):
    def make_log_edges(min_pt, max_pt, n_bins):
        # Emulate BinLogX logic exactly (no extra clamp needed; min_pt=0.9 > 0.01)
        log_min = np.log10(min_pt)
        log_max = np.log10(max_pt)
        return np.logspace(log_min, log_max, n_bins + 1, base=10.0)

    def make_linear_edges(a, b, n):
        return np.linspace(a, b, n + 1)

    def geometric_centers(edges):
        low = edges[:-1]
        high = edges[1:]
        return np.sqrt(low * high)

    def linear_centers(edges):
        return 0.5 * (edges[:-1] + edges[1:])

    pt_bins = (
        make_log_edges(MTV_MIN_PT, MTV_MAX_PT, MTV_N_PT)
        if USE_LOG_PT
        else make_linear_edges(MTV_MIN_PT, MTV_MAX_PT, MTV_N_PT)
    )
    eta_bins = make_linear_edges(MTV_MIN_ETA, MTV_MAX_ETA, MTV_N_ETA)
    phi_bins = make_linear_edges(MTV_MIN_PHI, MTV_MAX_PHI, MTV_N_PHI)

    pt_centers = geometric_centers(pt_bins) if USE_LOG_PT else linear_centers(pt_bins)
    eta_centers = linear_centers(eta_bins)
    phi_centers = linear_centers(phi_bins)

    # Gen selection (denominator) -> last-copy muons within η and pT ranges used by TP selector
    LAST_COPY_BIT = 13
    statusFlags = arrays.GenPart_statusFlags
    is_last_copy = (statusFlags & (1 << LAST_COPY_BIT)) != 0

    gen_base = (abs(arrays.GenPart_pdgId) == MUON_ABS_PDGID) & is_last_copy
    gen_sel = (
        gen_base & (arrays.GenPart_pt > PT_MIN) & (abs(arrays.GenPart_eta) < ETA_MAX)
    )

    gen_pt = arrays.GenPart_pt[gen_sel]
    gen_eta = arrays.GenPart_eta[gen_sel]
    gen_phi = wrap_phi(arrays.GenPart_phi[gen_sel])

    # Track-level truth info
    tp_pdg = arrays.muon_pixel_tracks_tpPdgId
    tp_pt = arrays.muon_pixel_tracks_tpPt
    tp_eta = arrays.muon_pixel_tracks_tpEta
    tp_phi = wrap_phi(arrays.muon_pixel_tracks_tpPhi)

    # Apply selection here so that only kept tracks can be matched with Gen
    tp_sel = (
        (abs(tp_pdg) == MUON_ABS_PDGID)
        & (tp_pt > PT_MIN)
        & (abs(tp_eta) < ETA_MAX)
        & selection
    )
    eff_tp_pt = tp_pt[tp_sel]
    eff_tp_eta = tp_eta[tp_sel]
    eff_tp_phi = tp_phi[tp_sel]

    # ------ Efficiency ------
    # Match TPs with Gen for Efficiency (geometric + relative pT)
    gen_zip = ak.zip({"pt": gen_pt, "eta": gen_eta, "phi": gen_phi})
    tp_zip = ak.zip({"pt": eff_tp_pt, "eta": eff_tp_eta, "phi": eff_tp_phi})

    pairs = ak.cartesian({"g": gen_zip, "t": tp_zip}, axis=1, nested=True)
    dphi = wrap_phi(pairs.g.phi - pairs.t.phi)
    deta = pairs.g.eta - pairs.t.eta
    dr = np.sqrt(deta**2 + dphi**2)
    relpt = np.abs(pairs.g.pt - pairs.t.pt) / pairs.g.pt

    match_matrix = (dr < DR_MATCH) & (relpt < RELPT_MATCH)
    gen_matched_mask = ak.any(match_matrix, axis=2)  # shape: (events, NgenSelected)

    # Flatten all values and mask
    gen_pt_all = ak.to_numpy(ak.flatten(gen_pt))
    gen_eta_all = ak.to_numpy(ak.flatten(gen_eta))
    gen_phi_all = ak.to_numpy(ak.flatten(gen_phi))
    gen_match_flat = ak.to_numpy(ak.flatten(gen_matched_mask))

    # Get numerators and denominators
    # Efficiency = (number of matched Gen) / (total selected Gen)
    num_pt, den_pt = binned(gen_match_flat, gen_pt_all, pt_bins)
    num_eta, den_eta = binned(gen_match_flat, gen_eta_all, eta_bins)
    num_phi, den_phi = binned(gen_match_flat, gen_phi_all, phi_bins)

    # Compute ratio and mask bins with den = 0
    eff_pt, eff_pt_err = wilson(num_pt, den_pt)
    eff_eta, eff_eta_err = wilson(num_eta, den_eta)
    eff_phi, eff_phi_err = wilson(num_phi, den_phi)
    valid_eff_pt = den_pt > 0
    valid_eff_eta = den_eta > 0
    valid_eff_phi = den_phi > 0

    global_eff = gen_match_flat.sum() / max(len(gen_match_flat), 1)
    print(f"Global efficiency (last-copy muons): {global_eff * 100:.2f}%")

    # ------ Fake rate ------
    trk_pt_all = arrays.muon_pixel_tracks_pt
    trk_eta_all = arrays.muon_pixel_tracks_eta
    trk_phi_all = wrap_phi(arrays.muon_pixel_tracks_phi)

    # Select tracks within η and pT ranges used by TP selector + selection cuts (denominator)
    trk_sel = (trk_pt_all > PT_MIN) & (abs(trk_eta_all) < ETA_MAX) & selection

    fake_track_mask = trk_sel & (arrays.muon_pixel_tracks_matched == 0)

    # Flatten for histograms
    keep_flat = ak.to_numpy(ak.flatten(trk_sel))
    fake_flat = ak.to_numpy(ak.flatten(fake_track_mask))[keep_flat]

    trk_pt_kept = ak.to_numpy(ak.flatten(trk_pt_all[trk_sel]))
    trk_eta_kept = ak.to_numpy(ak.flatten(trk_eta_all[trk_sel]))
    trk_phi_kept = ak.to_numpy(ak.flatten(wrap_phi(trk_phi_all[trk_sel])))

    # Get numerators and denominators
    # Fake rate = (number of fake tracks) / (total selected tracks)
    fake_num_pt, fake_den_pt = binned(fake_flat, trk_pt_kept, pt_bins)
    fake_num_eta, fake_den_eta = binned(fake_flat, trk_eta_kept, eta_bins)
    fake_num_phi, fake_den_phi = binned(fake_flat, trk_phi_kept, phi_bins)

    # Compute the ratio and mask bins with den = 0
    fake_pt, fake_pt_err = wilson(fake_num_pt, fake_den_pt)
    fake_eta, fake_eta_err = wilson(fake_num_eta, fake_den_eta)
    fake_phi, fake_phi_err = wilson(fake_num_phi, fake_den_phi)
    valid_fake_pt = fake_den_pt > 0
    valid_fake_eta = fake_den_eta > 0
    valid_fake_phi = fake_den_phi > 0

    global_fake = fake_flat.sum() / max(len(fake_flat), 1)
    print(f"Global fake rate: {global_fake * 100:.2f}%")

    # ------ Make plots ------
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))

    # Row 1: Efficiency / Fake rate vs pT
    ax = axes[0, 0]
    ax.errorbar(
        pt_centers[valid_eff_pt],
        eff_pt[valid_eff_pt],
        yerr=eff_pt_err[valid_eff_pt],
        markersize=2,
        fmt="s",
        capsize=2,
    )
    ax.set_xscale("log")
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Gen muon pT [GeV]", loc="right")
    ax.set_ylabel("Efficiency", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Efficiency vs pT")

    ax = axes[0, 1]
    ax.errorbar(
        pt_centers[valid_fake_pt],
        fake_pt[valid_fake_pt],
        yerr=fake_pt_err[valid_fake_pt],
        markersize=2,
        fmt="s",
        capsize=2,
        color="tab:red",
    )
    ax.set_xscale("log")
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Reco track pT [GeV]", loc="right")
    ax.set_ylabel("Fake rate", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Fake Rate vs pT")

    # Row 2: Efficiency / Fake rate vs η
    ax = axes[1, 0]
    ax.errorbar(
        eta_centers[valid_eff_eta],
        eff_eta[valid_eff_eta],
        yerr=eff_eta_err[valid_eff_eta],
        markersize=2,
        fmt="s",
        capsize=2,
    )
    ax.set_xlim(MTV_MIN_ETA, MTV_MAX_ETA)
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Gen muon η", loc="right")
    ax.set_ylabel("Efficiency", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Efficiency vs η")

    ax = axes[1, 1]
    ax.errorbar(
        eta_centers[valid_fake_eta],
        fake_eta[valid_fake_eta],
        yerr=fake_eta_err[valid_fake_eta],
        markersize=2,
        fmt="s",
        capsize=2,
        color="tab:red",
    )
    ax.set_xlim(MTV_MIN_ETA, MTV_MAX_ETA)
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Reco track η", loc="right")
    ax.set_ylabel("Fake rate", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Fake Rate vs η")

    # Row 3: Efficiency / Fake rate vs φ
    ax = axes[2, 0]
    ax.errorbar(
        phi_centers[valid_eff_phi],
        eff_phi[valid_eff_phi],
        yerr=eff_phi_err[valid_eff_phi],
        markersize=2,
        fmt="s",
        capsize=2,
    )
    ax.set_xlim(MTV_MIN_PHI, MTV_MAX_PHI)
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Gen muon φ", loc="right")
    ax.set_ylabel("Efficiency", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Efficiency vs φ")

    ax = axes[2, 1]
    ax.errorbar(
        phi_centers[valid_fake_phi],
        fake_phi[valid_fake_phi],
        yerr=fake_phi_err[valid_fake_phi],
        markersize=2,
        fmt="s",
        capsize=2,
        color="tab:red",
    )
    ax.set_xlim(MTV_MIN_PHI, MTV_MAX_PHI)
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Reco track φ", loc="right")
    ax.set_ylabel("Fake rate", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Fake Rate vs φ")

    # Set axes ticks
    for ax in axes.flatten():
        if not ax.get_xscale() == "log":
            ax.xaxis.set_major_locator(plt.MultipleLocator(0.5))
            ax.xaxis.set_minor_locator(plt.MultipleLocator(0.1))
        ax.yaxis.set_major_locator(plt.MultipleLocator(0.1))
        ax.yaxis.set_minor_locator(plt.MultipleLocator(0.02))

    fig.tight_layout()
    return fig

In [None]:
# Geometric-only matching for L1TkMu
def plot_efficiency_and_fake_L1(arrays=arrays, selection=(arrays.L1TkMu_pt >= 0)):
    def make_log_edges(min_pt, max_pt, n_bins):
        # Emulate BinLogX logic exactly (no extra clamp needed; min_pt=0.9 > 0.01)
        log_min = np.log10(min_pt)
        log_max = np.log10(max_pt)
        return np.logspace(log_min, log_max, n_bins + 1, base=10.0)

    def make_linear_edges(a, b, n):
        return np.linspace(a, b, n + 1)

    def geometric_centers(edges):
        low = edges[:-1]
        high = edges[1:]
        return np.sqrt(low * high)

    def linear_centers(edges):
        return 0.5 * (edges[:-1] + edges[1:])

    pt_bins = (
        make_log_edges(MTV_MIN_PT, MTV_MAX_PT, MTV_N_PT)
        if USE_LOG_PT
        else make_linear_edges(MTV_MIN_PT, MTV_MAX_PT, MTV_N_PT)
    )
    eta_bins = make_linear_edges(MTV_MIN_ETA, MTV_MAX_ETA, MTV_N_ETA)
    phi_bins = make_linear_edges(MTV_MIN_PHI, MTV_MAX_PHI, MTV_N_PHI)

    pt_centers = geometric_centers(pt_bins) if USE_LOG_PT else linear_centers(pt_bins)
    eta_centers = linear_centers(eta_bins)
    phi_centers = linear_centers(phi_bins)

    # Gen selection (denominator) -> last-copy muons within η and pT ranges used by TP selector
    LAST_COPY_BIT = 13
    statusFlags = arrays.GenPart_statusFlags
    is_last_copy = (statusFlags & (1 << LAST_COPY_BIT)) != 0

    gen_base = (abs(arrays.GenPart_pdgId) == MUON_ABS_PDGID) & is_last_copy
    gen_sel = (
        gen_base & (arrays.GenPart_pt > PT_MIN_L1) & (abs(arrays.GenPart_eta) < ETA_MAX)
    )

    gen_pt = arrays.GenPart_pt[gen_sel]
    gen_eta = arrays.GenPart_eta[gen_sel]
    gen_phi = wrap_phi(arrays.GenPart_phi[gen_sel])

    # L1TkMu candidates with acceptance + external selection applied
    L1_pt_all = arrays.L1TkMu_pt
    L1_eta_all = arrays.L1TkMu_eta
    L1_phi_all = wrap_phi(arrays.L1TkMu_phi)
    L1_sel = (L1_pt_all > PT_MIN_L1) & (abs(L1_eta_all) < ETA_MAX) & selection

    L1_pt = L1_pt_all[L1_sel]
    L1_eta = L1_eta_all[L1_sel]
    L1_phi = L1_phi_all[L1_sel]

    # ------ Matching (geometrical + relative pT) ------
    # Efficiency: Gen × L1, reduce over L1 per Gen
    gen_zip = ak.zip({"pt": gen_pt, "eta": gen_eta, "phi": gen_phi})
    l1_zip = ak.zip({"pt": L1_pt, "eta": L1_eta, "phi": L1_phi})

    pairs_gt = ak.cartesian({"g": gen_zip, "t": l1_zip}, axis=1, nested=True)
    dphi_gt = wrap_phi(pairs_gt.g.phi - pairs_gt.t.phi)
    deta_gt = pairs_gt.g.eta - pairs_gt.t.eta
    dr_gt = np.sqrt(deta_gt**2 + dphi_gt**2)
    relpt_gt = np.abs(pairs_gt.g.pt - pairs_gt.t.pt) / pairs_gt.g.pt
    match_matrix_gt = (dr_gt < DR_MATCH) & (relpt_gt < RELPT_MATCH)

    # For efficiency: whether a Gen is matched to any L1
    gen_matched_mask = ak.any(match_matrix_gt, axis=2)  # (events, NgenSelected)

    # Fake-rate: L1 × Gen, reduce over Gen per L1
    pairs_tg = ak.cartesian({"t": l1_zip, "g": gen_zip}, axis=1, nested=True)
    dphi_tg = wrap_phi(pairs_tg.t.phi - pairs_tg.g.phi)
    deta_tg = pairs_tg.t.eta - pairs_tg.g.eta
    dr_tg = np.sqrt(deta_tg**2 + dphi_tg**2)
    relpt_tg = np.abs(pairs_tg.g.pt - pairs_tg.t.pt) / pairs_tg.g.pt
    match_matrix_tg = (dr_tg < DR_MATCH) & (relpt_tg < RELPT_MATCH)

    # For fake rate: whether an L1 is matched to any Gen
    l1_matched_mask = ak.any(match_matrix_tg, axis=2)  # (events, NL1Selected)

    # ------ Efficiency vs Gen kinematics ------
    gen_pt_all = ak.to_numpy(ak.flatten(gen_pt))
    gen_eta_all = ak.to_numpy(ak.flatten(gen_eta))
    gen_phi_all = ak.to_numpy(ak.flatten(gen_phi))
    gen_match_flat = ak.to_numpy(ak.flatten(gen_matched_mask))

    num_pt, den_pt = binned(gen_match_flat, gen_pt_all, pt_bins)
    num_eta, den_eta = binned(gen_match_flat, gen_eta_all, eta_bins)
    num_phi, den_phi = binned(gen_match_flat, gen_phi_all, phi_bins)

    eff_pt, eff_pt_err = wilson(num_pt, den_pt)
    eff_eta, eff_eta_err = wilson(num_eta, den_eta)
    eff_phi, eff_phi_err = wilson(num_phi, den_phi)
    valid_eff_pt = den_pt > 0
    valid_eff_eta = den_eta > 0
    valid_eff_phi = den_phi > 0

    global_eff = gen_match_flat.sum() / max(len(gen_match_flat), 1)
    print(f"Global efficiency (last-copy muons): {global_eff * 100:.2f}%")

    # ------ Fake rate vs L1 kinematics (geometrical matching) ------
    l1_pt_kept = ak.to_numpy(ak.flatten(L1_pt))
    l1_eta_kept = ak.to_numpy(ak.flatten(L1_eta))
    l1_phi_kept = ak.to_numpy(ak.flatten(L1_phi))

    l1_matched_flat = ak.to_numpy(ak.flatten(l1_matched_mask))
    l1_fake_flat = ~l1_matched_flat

    fake_num_pt, fake_den_pt = binned(l1_fake_flat, l1_pt_kept, pt_bins)
    fake_num_eta, fake_den_eta = binned(l1_fake_flat, l1_eta_kept, eta_bins)
    fake_num_phi, fake_den_phi = binned(l1_fake_flat, l1_phi_kept, phi_bins)

    fake_pt, fake_pt_err = wilson(fake_num_pt, fake_den_pt)
    fake_eta, fake_eta_err = wilson(fake_num_eta, fake_den_eta)
    fake_phi, fake_phi_err = wilson(fake_num_phi, fake_den_phi)
    valid_fake_pt = fake_den_pt > 0
    valid_fake_eta = fake_den_eta > 0
    valid_fake_phi = fake_den_phi > 0

    global_fake = l1_fake_flat.sum() / max(len(l1_fake_flat), 1)
    print(f"Global fake rate: {global_fake * 100:.2f}%")

    # ------ Make plots ------
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))

    # Row 1: Efficiency / Fake rate vs pT
    ax = axes[0, 0]
    ax.errorbar(
        pt_centers[valid_eff_pt],
        eff_pt[valid_eff_pt],
        yerr=eff_pt_err[valid_eff_pt],
        markersize=2,
        fmt="s",
        capsize=2,
    )
    ax.set_xscale("log")
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Gen muon pT [GeV]", loc="right")
    ax.set_ylabel("Efficiency", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Efficiency vs pT")

    ax = axes[0, 1]
    ax.errorbar(
        pt_centers[valid_fake_pt],
        fake_pt[valid_fake_pt],
        yerr=fake_pt_err[valid_fake_pt],
        markersize=2,
        fmt="s",
        capsize=2,
        color="tab:red",
    )
    ax.set_xscale("log")
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("L1TkMu pT [GeV]", loc="right")
    ax.set_ylabel("Fake rate", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Fake Rate vs pT")

    # Row 2: Efficiency / Fake rate vs η
    ax = axes[1, 0]
    ax.errorbar(
        eta_centers[valid_eff_eta],
        eff_eta[valid_eff_eta],
        yerr=eff_eta_err[valid_eff_eta],
        markersize=2,
        fmt="s",
        capsize=2,
    )
    ax.set_xlim(MTV_MIN_ETA, MTV_MAX_ETA)
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Gen muon η", loc="right")
    ax.set_ylabel("Efficiency", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Efficiency vs η")

    ax = axes[1, 1]
    ax.errorbar(
        eta_centers[valid_fake_eta],
        fake_eta[valid_fake_eta],
        yerr=fake_eta_err[valid_fake_eta],
        markersize=2,
        fmt="s",
        capsize=2,
        color="tab:red",
    )
    ax.set_xlim(MTV_MIN_ETA, MTV_MAX_ETA)
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("L1TkMu η", loc="right")
    ax.set_ylabel("Fake rate", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Fake Rate vs η")

    # Row 3: Efficiency / Fake rate vs φ
    ax = axes[2, 0]
    ax.errorbar(
        phi_centers[valid_eff_phi],
        eff_phi[valid_eff_phi],
        yerr=eff_phi_err[valid_eff_phi],
        markersize=2,
        fmt="s",
        capsize=2,
    )
    ax.set_xlim(MTV_MIN_PHI, MTV_MAX_PHI)
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("Gen muon φ", loc="right")
    ax.set_ylabel("Efficiency", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Efficiency vs φ")

    ax = axes[2, 1]
    ax.errorbar(
        phi_centers[valid_fake_phi],
        fake_phi[valid_fake_phi],
        yerr=fake_phi_err[valid_fake_phi],
        markersize=2,
        fmt="s",
        capsize=2,
        color="tab:red",
    )
    ax.set_xlim(MTV_MIN_PHI, MTV_MAX_PHI)
    ax.set_ylim(0, 1.05)
    ax.set_xlabel("L1TkMu φ", loc="right")
    ax.set_ylabel("Fake rate", loc="top")
    ax.grid(alpha=0.3)
    ax.set_title("Fake Rate vs φ")

    # Set axes ticks
    for ax in axes.flatten():
        if not ax.get_xscale() == "log":
            ax.xaxis.set_major_locator(plt.MultipleLocator(0.5))
            ax.xaxis.set_minor_locator(plt.MultipleLocator(0.1))
        ax.yaxis.set_major_locator(plt.MultipleLocator(0.1))
        ax.yaxis.set_minor_locator(plt.MultipleLocator(0.02))
        ax.legend([r"p$_{T}$"f" L1TkMu > {PT_MIN_L1} GeV"])

    fig.tight_layout()
    return fig

In [None]:
def plot_and_match_pixelTracks_with_L1TkMu(
    arrays=arrays,
    trk_selection=(arrays.muon_pixel_tracks_pt >= PT_MIN),
    l1_selection=((arrays.L1TkMu_pt >= PT_MIN) & (abs(arrays.L1TkMu_eta) < ETA_MAX)),
    deta_window=0.35,
    dphi_window=0.2,
):
    """
    Plot delta eta and delta phi between muon pixeltracks and L1TkMuons as functions of track pT.
    Also creates a mask for pixeltracks that match L1TkMuons within specified windows.

    Returns:
        Tuple of (figure, matching_mask)
    """
    # Apply selections
    trk_pt = arrays.muon_pixel_tracks_pt[trk_selection]
    trk_eta = arrays.muon_pixel_tracks_eta[trk_selection]
    trk_phi = wrap_phi(arrays.muon_pixel_tracks_phi[trk_selection])

    l1_pt = arrays.L1TkMu_pt[l1_selection]
    l1_eta = arrays.L1TkMu_eta[l1_selection]
    l1_phi = wrap_phi(arrays.L1TkMu_phi[l1_selection])

    # Create cartesian pairing between tracks and L1TkMuons (per-event)
    trk_zip = ak.zip({"pt": trk_pt, "eta": trk_eta, "phi": trk_phi})
    l1_zip = ak.zip({"pt": l1_pt, "eta": l1_eta, "phi": l1_phi})
    pairs = ak.cartesian({"t": trk_zip, "l": l1_zip}, axis=1, nested=True)

    # Calculate deltas
    deta = pairs.t.eta - pairs.l.eta
    dphi =wrap_phi(pairs.t.phi - pairs.l.phi)

    # Nearest L1 per track in ΔR
    dr = np.sqrt(deta**2 + dphi**2)
    idx_near = ak.argmin(dr, axis=2)  # (events, Ntrk) option-index
    lidx = ak.local_index(dr, axis=2)
    choose_near = lidx == idx_near[..., None]

    deta_closest = ak.firsts(deta[choose_near], axis=2)
    dphi_closest = ak.firsts(dphi[choose_near], axis=2)

    # Matching mask within rectangular window per track
    match_matrix = (abs(deta) < deta_window) & (abs(dphi) < dphi_window)
    track_matched = ak.any(match_matrix, axis=2)  # (events, trk)

    # Convert to numpy arrays for plotting (flatten tracks across events)
    deta_np = ak.to_numpy(ak.flatten(deta_closest))
    dphi_np = ak.to_numpy(ak.flatten(dphi_closest))
    pt_np = ak.to_numpy(ak.flatten(trk_pt))
    
    # Get matched status
    matched_status = ak.to_numpy(ak.flatten(arrays.muon_pixel_tracks_matched[trk_selection]))

    # Filter out any NaN values
    valid_mask = np.isfinite(deta_np) & np.isfinite(dphi_np) & np.isfinite(pt_np)
    deta_np = deta_np[valid_mask]
    dphi_np = dphi_np[valid_mask]
    pt_np = pt_np[valid_mask]
    matched_status = matched_status[valid_mask]

    # Create plots
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    # Delta eta vs pT (2D histogram with separate colors for matched/unmatched)
    matched_mask = matched_status == 1
    unmatched_mask = matched_status == 0
    
    # Plot unmatched tracks first (background)
    if np.any(unmatched_mask):
        h1_unmatched = axes[0, 0].hist2d(
            pt_np[unmatched_mask],
            deta_np[unmatched_mask],
            bins=[np.logspace(0, 2, 50), np.linspace(-0.2, 0.2, 100)],
            cmap="Reds",
            norm=plt.matplotlib.colors.LogNorm(),
            alpha=0.7,
            label="Unmatched"
        )
    
    # Plot matched tracks on top
    if np.any(matched_mask):
        h1_matched = axes[0, 0].hist2d(
            pt_np[matched_mask],
            deta_np[matched_mask],
            bins=[np.logspace(0, 2, 50), np.linspace(-0.2, 0.2, 100)],
            cmap="Blues",
            norm=plt.matplotlib.colors.LogNorm(),
            alpha=0.8,
            label="Matched"
        )
        plt.colorbar(h1_matched[3], ax=axes[0, 0], label="Matched tracks")
    elif np.any(unmatched_mask):
        plt.colorbar(h1_unmatched[3], ax=axes[0, 0], label="Unmatched tracks")
        
    axes[0, 0].set_xscale("log")
    axes[0, 0].set_xlabel("Track pT [GeV]", loc="right")
    axes[0, 0].set_ylabel("Δη (Track - L1TkMu)", loc="top")
    axes[0, 0].grid(alpha=0.3)

    # Delta phi vs pT (2D histogram with separate colors for matched/unmatched)
    # Plot unmatched tracks first (background)
    if np.any(unmatched_mask):
        h2_unmatched = axes[0, 1].hist2d(
            pt_np[unmatched_mask],
            dphi_np[unmatched_mask],
            bins=[np.logspace(0, 2, 50), np.linspace(-0.2, 0.2, 100)],
            cmap="Reds",
            norm=plt.matplotlib.colors.LogNorm(),
            alpha=0.7
        )
    
    # Plot matched tracks on top
    if np.any(matched_mask):
        h2_matched = axes[0, 1].hist2d(
            pt_np[matched_mask],
            dphi_np[matched_mask],
            bins=[np.logspace(0, 2, 50), np.linspace(-0.2, 0.2, 100)],
            cmap="Blues",
            norm=plt.matplotlib.colors.LogNorm(),
            alpha=0.8
        )
        plt.colorbar(h2_matched[3], ax=axes[0, 1], label="Matched tracks")
    elif np.any(unmatched_mask):
        plt.colorbar(h2_unmatched[3], ax=axes[0, 1], label="Unmatched tracks")
        
    axes[0, 1].set_xscale("log")
    axes[0, 1].set_xlabel("Track pT [GeV]", loc="right")
    axes[0, 1].set_ylabel("Δφ (Track - L1TkMu)", loc="top")
    axes[0, 1].grid(alpha=0.3)

    # Delta eta projection (1D) with separate histograms for matched/unmatched
    if np.any(unmatched_mask):
        axes[1, 0].hist(deta_np[unmatched_mask], bins=100, range=(-0.5, 0.5), 
                       histtype="step", color="red", linewidth=2, label="Unmatched tracks")
    if np.any(matched_mask):
        axes[1, 0].hist(deta_np[matched_mask], bins=100, range=(-0.5, 0.5), 
                       histtype="step", color="blue", linewidth=2, label="Matched tracks")
    
    axes[1, 0].axvline(-deta_window, color="black", linestyle="--", label=f"Δη window = ±{deta_window}")
    axes[1, 0].axvline(deta_window, color="black", linestyle="--")
    axes[1, 0].set_xlabel("Δη (Track - L1TkMu)", loc="right")
    axes[1, 0].set_ylabel("Tracks", loc="top")
    axes[1, 0].set_yscale("log")
    axes[1, 0].legend()
    axes[1, 0].grid(alpha=0.3)

    # Delta phi projection (1D) with separate histograms for matched/unmatched
    if np.any(unmatched_mask):
        axes[1, 1].hist(dphi_np[unmatched_mask], bins=100, range=(-0.5, 0.5), 
                       histtype="step", color="red", linewidth=2, label="Unmatched tracks")
    if np.any(matched_mask):
        axes[1, 1].hist(dphi_np[matched_mask], bins=100, range=(-0.5, 0.5), 
                       histtype="step", color="blue", linewidth=2, label="Matched tracks")
    
    axes[1, 1].axvline(-dphi_window, color="black", linestyle="--", label=f"Δφ window = ±{dphi_window}")
    axes[1, 1].axvline(dphi_window, color="black", linestyle="--")
    axes[1, 1].set_xlabel("Δφ (Track - L1TkMu)", loc="right")
    axes[1, 1].set_ylabel("Tracks", loc="top")
    axes[1, 1].set_yscale("log")
    axes[1, 1].legend()
    axes[1, 1].grid(alpha=0.3)


    fig.suptitle("Δη and Δφ between PixelTracks and L1TkMuons", fontsize=16)
    fig.tight_layout(rect=[0, 0, 1, 0.96])

    # Rebuild the track_matched mask to match original array shape (accounting for selection)
    # Initialize full mask to False everywhere
    full_mask = ak.zeros_like(arrays.muon_pixel_tracks_pt, dtype=bool)
    
    # Create counts for reconstructing jagged structure
    counts = ak.num(arrays.muon_pixel_tracks_pt)
    
    # Flatten everything and work in flat arrays
    full_mask_flat = ak.to_numpy(ak.flatten(full_mask))
    trk_selection_flat = ak.to_numpy(ak.flatten(trk_selection))
    track_matched_flat = ak.to_numpy(ak.flatten(track_matched))
    
    # Set matched tracks in the full mask
    full_mask_flat[trk_selection_flat] = track_matched_flat
    
    # Reconstruct jagged structure
    full_mask = ak.unflatten(full_mask_flat, counts)

    matching_stats = ak.sum(full_mask) / ak.sum(arrays.muon_pixel_tracks_pt > 0)
    print(
        f"Matching statistics: {ak.sum(full_mask)} tracks matched out of {ak.sum(arrays.muon_pixel_tracks_pt > 0)} total ({matching_stats*100:.2f}%)"
    )

    return fig, full_mask

# Visual inspection

## L1 Tracker Muons

In [None]:
l1plot = plot_efficiency_and_fake_L1()
l1plot.savefig("L1TkMu_efficiency_and_fake.png", dpi=300)

In [None]:

l1TkMu_match_plot, l1TkMu_match_mask = plot_and_match_pixelTracks_with_L1TkMu(
    deta_window=0.01,
    dphi_window=0.01,
)
l1TkMu_match_plot.savefig("L1TkMu_matching.png", dpi=300)

## No cuts

### Track quality

In [None]:
tqNoSel = plot_track_quality_parameters(selection=l1TkMu_match_mask)
outname = (
    "track_parameters_legacy_all.png" if legacy else "track_parameters_ext_all.png"
)
tqNoSel.savefig(outname, dpi=300)

### Efficiency and fake rate

In [None]:
effFakeNoSel = plot_efficiency_and_fake(selection=l1TkMu_match_mask)
outname = "eff_and_fake_legacy_all.png" if legacy else "eff_and_fake_ext_all.png"
effFakeNoSel.savefig(outname, dpi=300)

## Simple cuts

In [None]:
dzErrCut = arrays.muon_pixel_tracks_dzErr < (
    0.018 if legacy else 0.015
)  # 0.006 aggressive
phiErrCut = arrays.muon_pixel_tracks_phiErr < (0.0040 if legacy else 0.0020)
qoverpCut = np.abs(arrays.muon_pixel_tracks_qoverp) < (
    1 if legacy else 0.075
)  # 0.05 aggressive
qoverpErrCut = arrays.muon_pixel_tracks_qoverpErr < (0.035 if legacy else 0.004)
lambdaErrCut = arrays.muon_pixel_tracks_lambdaErr < (1 if legacy else 0.0006)
etaErrCut = arrays.muon_pixel_tracks_etaErr < (1 if legacy else 0.0006)

sel = (
    (arrays.muon_pixel_tracks_pt >= 0)
    # & phiErrCut
    & dzErrCut
    & qoverpCut
    # & qoverpErrCut
    # & lambdaErrCut
    & etaErrCut
)

### Track quality

In [None]:
tqSel = plot_track_quality_parameters(selection=sel)
outname = (
    "track_parameters_legacy_cut_selection.png"
    if legacy
    else "track_parameters_ext_cut_selection.png"
)
tqSel.savefig(outname, dpi=300)

### Efficiency and fake

In [None]:
effFakeSel = plot_efficiency_and_fake(selection=sel)
outname = (
    "efficiency_and_fake_legacy_cut_selection.png"
    if legacy
    else "efficiency_and_fake_ext_cut_selection.png"
)
effFakeSel.savefig(outname, dpi=300)

# ML Models performance 

## BDT

In [None]:
# Load model (prefer full pipeline if available)
import joblib

pipeline_artifact = joblib.load("bdt_pipeline.pkl")
print("Loaded bdt_pipeline.pkl (pipeline + feature ordering).")
bdt_model = pipeline_artifact["pipeline"]
feature_order = pipeline_artifact["feature_names"]

In [None]:
# BDT application (adaptive). Requires bdt_pipeline.pkl for consistent scaling.
if not hasattr(bdt_model, "predict_proba"):
    raise RuntimeError(
        "Loaded model has no predict_proba; please load bdt_pipeline.pkl artifact."
    )

# Build flat feature matrix in training order
feature_map = {
    "pt": arrays.muon_pixel_tracks_pt,
    "eta": arrays.muon_pixel_tracks_eta,
    "phi": arrays.muon_pixel_tracks_phi,
    "qoverp": arrays.muon_pixel_tracks_qoverp,
    "qoverpErr": arrays.muon_pixel_tracks_qoverpErr,
    "dzErr": arrays.muon_pixel_tracks_dzErr,
    "etaErr": arrays.muon_pixel_tracks_etaErr,
    "lambdaErr": arrays.muon_pixel_tracks_lambdaErr,
    "dxyErr": arrays.muon_pixel_tracks_dxyErr,
    "phiErr": arrays.muon_pixel_tracks_phiErr,
    "normalizedChi2": arrays.muon_pixel_tracks_normalizedChi2,
    "nPixelHits": arrays.muon_pixel_tracks_nPixelHits,
    "nTrkLays": arrays.muon_pixel_tracks_nTrkLays,
    "dszErr": arrays.muon_pixel_tracks_dszErr,
}
cols = [ak.to_numpy(ak.flatten(feature_map[n])) for n in feature_order]
X_raw = np.vstack(cols).T
finite_mask = np.isfinite(X_raw).all(axis=1)
if not finite_mask.all():
    print(f"Non-finite rows dropped: {(~finite_mask).sum()}")
X_use = X_raw[finite_mask]

proba_valid = bdt_model.predict_proba(X_use)[:, list(bdt_model.classes_).index(1)]
proba_all = np.zeros(len(X_raw), dtype=float)
proba_all[finite_mask] = proba_valid

matched_truth = ak.to_numpy(ak.flatten(arrays.muon_pixel_tracks_matched)).astype(int)
assert len(matched_truth) == len(proba_all)

# Basic probability diagnostics
spread = proba_all.max() - proba_all.min()
print(
    "Prob stats: min={:.2f} max={:.2f} mean={:.2f} var={:.2e} spread={:.2f}".format(
        proba_all.min(), proba_all.max(), proba_all.mean(), proba_all.var(), spread
    )
)
print(
    "Quantiles (0,10,25,50,75,90,100)%:",
    np.quantile(proba_all, [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1]),
)

keep_mask = np.ones_like(proba_all, dtype=bool)
min_proba = 0.75
if spread < 1e-6:
    print("WARNING: probabilities constant -> cannot discriminate; keeping all.")
else:
    keep_mask = proba_all >= min_proba
    print(f"Using default threshold {min_proba}.")

# Final diagnostics
n_tot = keep_mask.size
n_keep = keep_mask.sum()
purity = matched_truth[keep_mask].mean() * 100 if n_keep else 0.0
eff_true = (
    (matched_truth[keep_mask].sum() / max(matched_truth.sum(), 1)) * 100
    if matched_truth.sum()
    else 0.0
)
print(
    f"Selection: kept {n_keep}/{n_tot} ({n_keep / n_tot * 100:.2f}%) Purity={purity:.2f}% Matched-eff={eff_true:.2f}%"
)

y_pred_classes = (
    (proba_all >= min_proba).astype(int) if spread >= 1e-6 else np.ones_like(proba_all)
)

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(matched_truth, y_pred_classes, labels=[0, 1])
if cm.size == 4:
    tn, fp, fn, tp = cm.ravel()
    acc = (tn + tp) / n_tot
    print(
        f"Confusion @{min_proba} (rows truth fake/matched, cols pred fake/matched):\n{cm}"
    )
    print(
        f"Acc={acc * 100:.2f}% TPR={tp / max(tp + fn, 1):.3f} FPR={fp / max(fp + tn, 1):.3f}"
    )
else:
    print("Confusion matrix degenerate:", cm)

# Build jagged mask
counts = ak.to_numpy(ak.num(arrays.muon_pixel_tracks_pt))
bdt_selection = ak.unflatten(keep_mask, counts)

In [None]:
# Plots using the derived BDT mask
tqBDT = plot_track_quality_parameters(selection=bdt_selection)
tqBDT.savefig("track_parameters_bdt_selection.png", dpi=300)
effFakeBDT = plot_efficiency_and_fake(selection=bdt_selection)
effFakeBDT.savefig("efficiency_and_fake_bdt_selection.png", dpi=300)

## DNN

In [None]:
import torch
import numpy as np
from sklearn.metrics import (
    roc_auc_score,
    confusion_matrix,
    roc_curve,
)

# Load DNN artifact
artifact_path = "dnn_artifact.pt"
art = torch.load(artifact_path, map_location="cpu", weights_only=False)
print("Loaded DNN artifact keys:", list(art.keys()))


# Rebuild model
class MLP(torch.nn.Module):
    def __init__(self, in_features, layers, dropout=0.0):
        super().__init__()
        import torch.nn as nn

        seq = []
        prev = in_features
        for h in layers:
            seq.extend([nn.Linear(prev, h), nn.BatchNorm1d(h), nn.ReLU()])
            if dropout > 0:
                seq.append(nn.Dropout(dropout))
            prev = h
        seq.append(nn.Linear(prev, 1))
        self.net = nn.Sequential(*seq)

    def forward(self, x):
        return self.net(x)


cfg = art["model_config"]
scaler = art["scaler"]
feat_order = art["feature_names"]
model = MLP(**cfg)
model.load_state_dict(art["state_dict"])
model.eval()
print("Model config:", cfg)

# Build flat feature matrix in same order
feature_map_dnn = {
    "muon_pixel_tracks_p": np.log10(arrays.muon_pixel_tracks_p),
    "muon_pixel_tracks_pt": np.log10(arrays.muon_pixel_tracks_pt),
    "muon_pixel_tracks_ptErr": np.log10(arrays.muon_pixel_tracks_ptErr),
    "muon_pixel_tracks_eta": arrays.muon_pixel_tracks_eta,
    "muon_pixel_tracks_etaErr": np.log10(arrays.muon_pixel_tracks_etaErr),
    "muon_pixel_tracks_phi": arrays.muon_pixel_tracks_phi,
    "muon_pixel_tracks_phiErr": np.log10(arrays.muon_pixel_tracks_phiErr),
    "muon_pixel_tracks_chi2": arrays.muon_pixel_tracks_chi2,
    "muon_pixel_tracks_normalizedChi2": arrays.muon_pixel_tracks_normalizedChi2,
    "muon_pixel_tracks_nPixelHits": arrays.muon_pixel_tracks_nPixelHits,
    "muon_pixel_tracks_nTrkLays": arrays.muon_pixel_tracks_nTrkLays,
    "muon_pixel_tracks_nFoundHits": arrays.muon_pixel_tracks_nFoundHits,
    "muon_pixel_tracks_nLostHits": arrays.muon_pixel_tracks_nLostHits,
    "muon_pixel_tracks_dsz": arrays.muon_pixel_tracks_dsz,
    "muon_pixel_tracks_dszErr": np.log10(arrays.muon_pixel_tracks_dszErr),
    "muon_pixel_tracks_dxy": arrays.muon_pixel_tracks_dxy,
    "muon_pixel_tracks_dxyErr": np.log10(arrays.muon_pixel_tracks_dxyErr),
    "muon_pixel_tracks_dz": arrays.muon_pixel_tracks_dz,
    "muon_pixel_tracks_dzErr": np.log10(arrays.muon_pixel_tracks_dzErr),
    "muon_pixel_tracks_qoverp": arrays.muon_pixel_tracks_qoverp,
    "muon_pixel_tracks_qoverpErr": np.log10(arrays.muon_pixel_tracks_qoverpErr),
    "muon_pixel_tracks_lambdaErr": np.log10(arrays.muon_pixel_tracks_lambdaErr),
}
cols = [
    torch.from_numpy(ak.to_numpy(ak.flatten(feature_map_dnn[f]))).float()
    for f in feat_order
]
X_all = torch.stack(cols, dim=1).numpy()
finite_mask = np.isfinite(X_all).all(axis=1)
if not finite_mask.all():
    print("Dropped non-finite rows:", (~finite_mask).sum())
X_use = X_all[finite_mask]
X_use_scaled = scaler.transform(X_use)

with torch.no_grad():
    logits = model(torch.from_numpy(X_use_scaled).float())
    probs_valid = torch.sigmoid(logits).numpy().ravel()

probs_all = np.zeros(len(X_all), dtype=float)
probs_all[finite_mask] = probs_valid

y_truth = ak.to_numpy(ak.flatten(arrays.muon_pixel_tracks_matched)).astype(int)
assert len(y_truth) == len(probs_all)
print(
    "Probability stats: min={:.3f} max={:.3f} mean={:.3f}".format(
        probs_all.min(), probs_all.max(), probs_all.mean()
    )
)

In [None]:
# Metrics & selection
spread = probs_all.max() - probs_all.min()
auc = (
    roc_auc_score(y_truth[finite_mask], probs_all[finite_mask])
    if spread > 0
    else float("nan")
)
print(f"AUC={auc:.3f} spread={spread:.4f}")

min_proba = art["best_threshold"] if "best_threshold" in art else 0.5
if spread < 1e-6:
    print("Probabilities near-constant; using keep-all mask.")
    keep_mask = np.ones_like(probs_all, dtype=bool)
else:
    keep_mask = probs_all >= min_proba

cm = confusion_matrix(y_truth, (probs_all >= min_proba).astype(int), labels=[0, 1])
print("Confusion matrix (rows truth fake/matched, cols pred fake/matched):")
print(cm)
if cm.size == 4:
    tn, fp, fn, tp = cm.ravel()
    acc = (tn + tp) / cm.sum()
    print(
        f"Acc={acc * 100:.2f}% TPR={tp / max(tp + fn, 1):.3f} FPR={fp / max(fp + tn, 1):.3f}"
    )

# ROC curve
fpr, tpr, thr = (
    roc_curve(y_truth[finite_mask], probs_all[finite_mask])
    if spread > 0
    else (None, None, None)
)
if fpr is not None:
    plt.figure(figsize=(5, 4))
    plt.plot(fpr, tpr, label=f"AUC={auc:.3f}")
    plt.plot([0, 1], [0, 1], "k--")
    plt.xlabel("FPR")
    plt.ylabel("TPR")
    plt.title("DNN ROC")
    plt.legend()
    plt.tight_layout()
    plt.show()

# Probability distributions
plt.figure(figsize=(5, 4))
plt.hist(probs_all[y_truth == 1], bins=40, histtype="step", label="matched")
plt.hist(probs_all[y_truth == 0], bins=40, histtype="step", label="fake")
plt.xlabel("Prob(matched)")
plt.ylabel("Tracks")
plt.legend()
plt.tight_layout()
plt.show()

# Build jagged mask similar to BDT selection
trk_counts = ak.to_numpy(ak.num(arrays.muon_pixel_tracks_pt))
dnn_selection = ak.unflatten(keep_mask, trk_counts)
print("Selection kept {:.2f}% of tracks".format(keep_mask.mean() * 100))

In [None]:
# Plots using the derived DNN mask
tqDnn = plot_track_quality_parameters(selection=dnn_selection)
tqDnn.savefig("track_parameters_dnn_selection.png", dpi=300)
effFakeDnn = plot_efficiency_and_fake(selection=dnn_selection)
effFakeDnn.savefig("efficiency_and_fake_dnn_selection.png", dpi=300)