In [None]:
import sys

import seaborn as sns

import numpy as np
import pandas as pd
import uproot
import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib.colors as colors

In [None]:
plt.rcParams.update({
    "axes.labelsize": 14,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 12,
    "figure.titlesize": 18
})

In [None]:
sys.path.append("/home/belle2/amubarak/Ds2D0enue_Analysis/07-Python_Functions/")

# Prep-Work

### Import Data

In [None]:
# === Load only selected branches ===
with open("/home/belle2/amubarak/Ds2D0enue_Analysis/03-Grid/Save_var.txt") as f:
    variables_to_load = [
        line.strip().strip(",").strip('"').strip("'")
        for line in f
        if line.strip() and not line.strip().startswith("#")
    ]

# In this notebook we only process the main signal and the generic events,
# for illustration purposes.
# You can add other backgrounds after if you wish.
samples = ["Signal","BB","ccbar","ddbar","ssbar","taupair","uubar"]
GenEvents = ["Signal","BB","ccbar","ddbar","ssbar","taupair","uubar"]

DataFrames = {}  # define empty dictionary to hold dataframes
Date = "0530"
Attempt = "0"

# Signal:
DataFrames[samples[0]] =  uproot.concatenate("/home/belle2/amubarak/C01-Simulated_Events/Ds2D0enu-Signal.root:Dstree",
                                             filter_name=variables_to_load,
                                             library='pd')
# Background
for s in samples[1:]: # loop over samples
    DataFrames[s] =  uproot.concatenate("/group/belle2/users2022/amubarak/02-Grid/Sample_Grid/Ds2D0e-Generic_Ds_" + Date +"25_"+ Attempt +"_"+ s +".root:Dstree",
                                        filter_name=variables_to_load,
                                        library='pd')

# Define background samples
background_samples = ["BB","ccbar","ddbar","ssbar","taupair","uubar"]

# Concatenate all background DataFrames into one called "All"
DataFrames["All"] = pd.concat([DataFrames[s] for s in background_samples], ignore_index=True)

# Combine light-quark backgrounds into a single 'uds' category
DataFrames["uds"] = pd.concat(
    [DataFrames["uubar"], DataFrames["ddbar"], DataFrames["ssbar"]],
    ignore_index=True
)

In [None]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

The line below is to look at the available variables.

In [None]:
DataFrames["Signal"].columns.tolist()

### Setup
The code below will be used to apply cuts to the data.

In [None]:
# Electron ID
#-------------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['e_electronID']>=0.95]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['e_electronID']>=0.95]
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_gammaveto_em_electronID']>=0.95]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_gammaveto_em_electronID']>=0.95]

# Photon Conversion
#-------------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_gammaveto_M_Correction']>=0.1]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_gammaveto_M_Correction']>=0.1]

# Peaking Background Removal
#----------------------------
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_diff_D0pi']>=0.15)]
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['Ds_diff_D0pi']>=0.15)]

# # Vertex Fitting
# #----------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_chiProb']>=0.01]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_chiProb']>=0.01]

# Dalitz Removal
#----------------------------
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_pi0veto_M_Correction']<=0.08) | (DataFrames["ccbar"]['Ds_pi0veto_M_Correction']>=0.16)]
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['Ds_pi0veto_M_Correction']<=0.08) | (DataFrames["Signal"]['Ds_pi0veto_M_Correction']>=0.16)]

# Vertex Fit
#----------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames[samples[0]]['Ds_chiProb_Ds_rank']==1]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames[samples[1]]['Ds_chiProb_Ds_rank']==1]

# # D0 Invariant Mass
# #-----------------------
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['D0_dM']>=-0.02) & (DataFrames["ccbar"]['D0_dM']<=0.02)]
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['D0_dM']>=-0.02) & (DataFrames["ccbar"]['D0_dM']<=0.02)]

# Comparison
I am comparing before and after applying the veto. I am checking where the distortion is happening. Next, I will check what is lost.

In [None]:
# === Settings ===
Stacked = True
Density = False
Bins = 50
Range = [0.0, 0.25]
perBin = ((Range[1] - Range[0]) / Bins) * 1000
print("Width Per Bin: {:.2f} MeV".format(perBin))

# Cut range on 'Ds_diff_D0pi'
cut_low = 0.142
cut_high = 0.15

# Variable to plot after cut
var = 'Ds_massDifference_0'

# Labels and colors
labels = [
    r'$c \bar{c}$',
    r'$u \bar{u}, \; d \bar{d}, \;s \bar{s}$',
    r'$BB$',
    r'$\tau^{+} \tau^{-}$'
]

# Apply sideband cut (outside signal region) and collect data
data = [
    DataFrames["ccbar"][var],
    DataFrames["uds"][var],
    DataFrames["BB"][var],
    DataFrames["taupair"][var],
]

# === Plot ===
# plt.figure(figsize=(8, 5))
plt.hist(data[::-1],
         label=labels[::-1],
         density=Density,
         stacked=Stacked,
         bins=Bins,
         range=Range,
         histtype='step',
         linewidth=2)

# Titles
plt.title(r'$D_s^{+} \rightarrow [D^{0} \rightarrow K^{-} \pi^{+}] e^{+} \nu_{e}$' + '\n' + r'$\Delta m_{\pi}(D_s^{+} - D^{0}) \notin [0.142,\; 0.15] \; \mathrm{GeV}/c^{2}$', loc="left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc="right")

# Labels
plt.xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
plt.ylabel(r'$Entries/(\;{:.2f}\;MeV/c^2)$'.format(perBin))
plt.legend()
# plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# === Settings ===
Density = True
Bins = 50
Range = [0.0, 0.25]
cut_low = 0.142
cut_high = 0.15
veto_query = f"Ds_diff_D0pi <= {cut_low} or Ds_diff_D0pi >= {cut_high}"
var = 'Ds_massDifference_0'
perBin = ((Range[1] - Range[0]) / Bins) * 1000

# === Labels and plot styles ===
labels = {
    "ccbar": r'$c \bar{c}$',
    "uds": r'$u \bar{u}, \; d \bar{d}, \;s \bar{s}$',
    "BB": r'$BB$',
    "taupair": r'$\tau^{+} \tau^{-}$'
}
color_before = "#007C91"
color_after = "#D1495B"

# === Histogram plotting function ===
def plot_overlay_with_ratio(df, df_vetoed, label):
    bins = np.linspace(Range[0], Range[1], Bins + 1)
    centers = 0.5 * (bins[1:] + bins[:-1])

    # Compute histograms
    hist_before, _ = np.histogram(df[var], bins=bins, density=Density)
    hist_after, _ = np.histogram(df_vetoed[var], bins=bins, density=Density)

    # Compute ratio with protection against divide-by-zero
    with np.errstate(divide='ignore', invalid='ignore'):
        ratio = np.divide(hist_after, hist_before, out=np.ones_like(hist_before), where=hist_before > 0)

    # === Plot ===
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6), gridspec_kw={'height_ratios': [3, 1]}, sharex=True)

    # Top: Overlay
    ax1.step(centers, hist_before, where='mid', color=color_before, label="Before veto", linewidth=2)
    ax1.step(centers, hist_after, where='mid', color=color_after, label="After veto", linewidth=2)
    ax1.set_ylabel(r'$Entries/(\;{:.2f}\;MeV/c^2)$'.format(perBin))
    ax1.set_title(label, loc="left")
    ax1.legend()

    # Bottom: Ratio
    ax2.step(centers, ratio, where='mid', color='black')
    ax2.set_xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
    ax2.set_ylabel("Ratio")
    ax2.set_ylim(0, 1.2)
    ax2.axhline(1.0, linestyle='--', color='gray', linewidth=1)

    fig.tight_layout()
    plt.show()

# === Run for each event type ===
for key in ["ccbar", "uds", "BB", "taupair"]:
    df = DataFrames[key]
    df_vetoed = df.query(veto_query)
    plot_overlay_with_ratio(df, df_vetoed, labels[key])


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# === Settings ===
Density = True
Bins = 50
Range = [0.0, 0.25]
cut_low = 0.142
cut_high = 0.15
veto_query = f"Ds_diff_D0pi <= {cut_low} or Ds_diff_D0pi >= {cut_high}"
var = 'Ds_massDifference_0'
perBin = ((Range[1] - Range[0]) / Bins) * 1000

# === Exclude D*+ events ===
is_not_DstarP = (abs(DataFrames["All"]["Ds_mcPDG"]) != 413)
df = DataFrames["All"][is_not_DstarP]
df_vetoed = df.query(veto_query)

# === Plotting function ===
def plot_overlay_with_residual(df, df_vetoed, label, color_before="#007C91", color_after="#D1495B"):
    bins = np.linspace(Range[0], Range[1], Bins + 1)
    centers = 0.5 * (bins[1:] + bins[:-1])

    # Compute histograms
    hist_before, _ = np.histogram(df[var], bins=bins, density=Density)
    hist_after, _ = np.histogram(df_vetoed[var], bins=bins, density=Density)

    # Compute residuals
    residual = hist_after - hist_before

    # === Plot ===
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6), gridspec_kw={'height_ratios': [3, 1]}, sharex=True)

    # Top: Overlay
    ax1.step(centers, hist_before, where='mid', color=color_before, label="Before veto", linewidth=2)
    ax1.step(centers, hist_after, where='mid', color=color_after, label="After veto", linewidth=2)
    ax1.set_ylabel(r'$Entries/(\;{:.2f}\;MeV/c^2)$'.format(perBin))
    ax1.set_title(label, loc="left")
    ax1.legend()

    # Bottom: Residuals
    ax2.axhline(0.0, linestyle='--', color='gray', linewidth=1)
    ax2.step(centers, residual, where='mid', color='black')
    ax2.set_xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
    ax2.set_ylabel("Residual")

    fig.tight_layout()
    plt.show()

# === Run the plot ===
plot_overlay_with_residual(df, df_vetoed, label="All MC (excluding $D^{*+}$)")



In [None]:
import numpy as np
import matplotlib.pyplot as plt

# === Settings ===
Density = False
Bins = 50
Range = [0.0, 0.25]
cut_low = 0.142
cut_high = 0.15
veto_query = f"Ds_diff_D0pi <= {cut_low} or Ds_diff_D0pi >= {cut_high}"
var = 'Ds_massDifference_0'
perBin = ((Range[1] - Range[0]) / Bins) * 1000

# === PDG-based category masks ===
is_DstarP = (abs(DataFrames["All"]["Ds_mcPDG"]) == 413)
is_Dstar0 = (abs(DataFrames["All"]["Ds_mcPDG"]) == 423)
is_Comb   = ~is_DstarP & ~is_Dstar0

Categories = {
    "D*+": DataFrames["All"][is_DstarP],
    "D*0": DataFrames["All"][is_Dstar0],
    "Comb.": DataFrames["All"][is_Comb]
}

Colors = {
    "D*+": "#4C6EB1",
    "D*0": "#007C91",
    "Comb.": "#D1495B"
}

# === Plotting function with residuals ===
def plot_overlay_with_residual(df, df_vetoed, label, color):
    bins = np.linspace(Range[0], Range[1], Bins + 1)
    centers = 0.5 * (bins[1:] + bins[:-1])

    # Compute histograms
    hist_before, _ = np.histogram(df[var], bins=bins, density=Density)
    hist_after, _ = np.histogram(df_vetoed[var], bins=bins, density=Density)

    # Compute residuals
    residual = hist_after - hist_before

    # === Plot ===
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6), gridspec_kw={'height_ratios': [3, 1]}, sharex=True)

    # Top: Overlay
    ax1.step(centers, hist_before, where='mid', color=color, label="Before veto", linewidth=2)
    ax1.step(centers, hist_after, where='mid', color='gray', label="After veto", linewidth=2)
    ax1.set_ylabel(r'$Entries/(\;{:.2f}\;MeV/c^2)$'.format(perBin))
    ax1.set_title(label, loc="left")
    ax1.legend()

    # Bottom: Residuals
    ax2.axhline(0.0, linestyle='--', color='gray', linewidth=1)
    ax2.step(centers, residual, where='mid', color='black')
    ax2.set_xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
    ax2.set_ylabel("Residual")

    fig.tight_layout()
    plt.show()

# === Run for each category ===
for label, df in Categories.items():
    df_vetoed = df.query(veto_query)
    plot_overlay_with_residual(df, df_vetoed, label, Colors[label])


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# === Settings ===
Bins = 50
Range = [0.0, 0.25]
cut_low = 0.142
cut_high = 0.15
var = 'Ds_massDifference_0'
veto_query = f"Ds_diff_D0pi <= {cut_low} or Ds_diff_D0pi >= {cut_high}"
color_before = "#007C91"
color_after = "#D1495B"

# === Extract "Other" category from ccbar
df_cc = DataFrames["ccbar"].copy()
is_other = ~(
    (df_cc["Ds_mcPDG"].abs() == 413) |
    (df_cc["Ds_mcPDG"].abs() == 423) |
    (df_cc["Ds_mcPDG"].isna())
)
df_other = df_cc[is_other].copy()

# === Label by e_mcPDG truth
df_other["ParticleType"] = np.select(
    [
        df_other["e_mcPDG"].abs() == 11,
        df_other["e_mcPDG"].abs() == 211
    ],
    ["electron", "pion"],
    default="other"
)

# === Residual Plot Function
def plot_residual_by_type(df, df_vetoed, label):
    bins = np.linspace(Range[0], Range[1], Bins + 1)
    centers = 0.5 * (bins[1:] + bins[:-1])
    hist_before, _ = np.histogram(df[var], bins=bins)
    hist_after, _ = np.histogram(df_vetoed[var], bins=bins)
    residual = hist_after - hist_before

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6), gridspec_kw={'height_ratios': [3, 1]}, sharex=True)
    ax1.step(centers, hist_before, where='mid', color=color_before, label="Before veto", linewidth=2)
    ax1.step(centers, hist_after, where='mid', color=color_after, label="After veto", linewidth=2)
    ax1.set_ylabel("Entries")
    ax1.set_title(f'"Other" Category: {label}', loc="left")
    ax1.legend()

    ax2.step(centers, residual, where='mid', color='black')
    ax2.axhline(0, color='gray', linestyle='--', linewidth=1)
    ax2.set_ylabel("Residual")
    ax2.set_xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')

    fig.tight_layout()
    plt.show()

# === Loop: compare electrons vs pions in Other
for particle_type in ["electron", "pion"]:
    df_type = df_other[df_other["ParticleType"] == particle_type]
    df_type_vetoed = df_type.query(veto_query)
    plot_residual_by_type(df_type, df_type_vetoed, label=particle_type)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# === Settings ===
Bins = 50
cut_low = 0.142
cut_high = 0.15
xrange = (0.0, 0.7)  # Adjustable plot range
color_before = "#007C91"
color_after = "#D1495B"

# === Input DataFrame and veto
df_all = DataFrames["All"]
df_all_vetoed = df_all.query("Ds_diff_D0pi <= @cut_low or Ds_diff_D0pi >= @cut_high")

# === Plotting Function (Plain Overlay)
def plot_hist_comparison(df_before, df_after, variable, xlabel):
    bins = np.linspace(xrange[0], xrange[1], Bins + 1)

    # plt.figure(figsize=(8, 5))
    plt.hist(df_before[variable], bins=bins, histtype='step', color=color_before,
             linewidth=2, label="Before veto")
    plt.hist(df_after[variable], bins=bins, histtype='step', color=color_after,
             linewidth=2, label="After veto")

    plt.xlabel(xlabel)
    plt.ylabel("Entries")
    plt.title(xlabel, loc="left")
    plt.legend()
    plt.tight_layout()
    plt.show()

# === Plot e_p and e_pt
plot_hist_comparison(df_all, df_all_vetoed, "e_p", r"Electron Momentum $p_e$ [GeV/c]")
plot_hist_comparison(df_all, df_all_vetoed, "e_pt", r"Electron Transverse Momentum $p_T$ [GeV/c]")


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# === Settings ===
Stacked = True
Density = True
Bins = 50
Range = [0.0, 0.25]
BDT = -1  # Not applied here, but variable is kept
cut_var = "Ds_diff_D0pi"
plot_var = 'Ds_massDifference_0'
pdg_var = 'Ds_mcPDG'
cut_low = 0.142
cut_high = 0.15
perBin = ((Range[1] - Range[0]) / Bins) * 1000
print("Width Per Bin: {:.2f} MeV".format(perBin))

# === Apply sideband cut to both data and MC ===
df_MC = DataFrames["All_WCh"].query(f"{cut_var} <= @cut_low or {cut_var} >= @cut_high")
df_data = DataFrames["Data_WCh"].query(f"{cut_var} <= @cut_low or {cut_var} >= @cut_high")

# === Extract variable ===
mc_vals = df_MC[plot_var]
data_vals = df_data[plot_var]

# === Histogram counts ===
hist_mc, edges = np.histogram(mc_vals, bins=Bins, range=Range, density=Density)
hist_data, _ = np.histogram(data_vals, bins=Bins, range=Range, density=Density)

bin_centers = 0.5 * (edges[1:] + edges[:-1])
bin_widths = np.diff(edges)

# === Statistical uncertainties ===
err_mc = np.sqrt(hist_mc) if not Density else np.sqrt(hist_mc * len(mc_vals)) / len(mc_vals)
err_data = np.sqrt(hist_data) if not Density else np.sqrt(hist_data * len(data_vals)) / len(data_vals)

# === Compute pull ===
pull = (hist_data - hist_mc) / np.sqrt(err_data**2 + err_mc**2)
pull[np.isnan(pull)] = 0
pull[np.isinf(pull)] = 0

# === Plot main comparison ===
plt.figure()
plt.hist(mc_vals, bins=Bins, range=Range, histtype='step', linewidth=2,
         label='MC', color="#007C91", density=Density)
plt.hist(data_vals, bins=Bins, range=Range, histtype='step', linewidth=2,
         label='Data', color="#2E2E2E", density=Density)

plt.xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
plt.ylabel(r'$Entries/(\;{:.2f}\;MeV/c^2)$'.format(perBin))
plt.title(r'$WC:$' + '\n' +
          r'$\Delta m_{\pi}(D_s^{+} - D^{0}) \notin [0.142,\; 0.15] \; \mathrm{GeV}/c^{2}$', loc="left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc="right")
plt.legend()
plt.show()

# === Plot pull ===
plt.figure()
plt.axhline(0, color='black', linewidth=1)
plt.bar(bin_centers, pull, width=bin_widths, color='gray', edgecolor='black')
plt.xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
plt.ylabel("Pull")
plt.title("Pull: (Data - MC) / Uncertainty")
plt.show()
