In [None]:
# ! pip install --upgrade pip
# ! pip install --user dask[dataframe]
# ! pip install --user numpy scipy matplotlib seaborn boost_histogram pandas uproot awkward-pandas``

In [None]:
import uproot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns

import sys

In [None]:
plt.rcParams.update({
    "axes.labelsize": 14,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 12,
    "figure.titlesize": 16
})

In [None]:
sys.path.append("/home/belle2/amubarak/Ds2D0enue_Analysis/07-Python_Functions/")

# Prep-Work

### Import Data

In [None]:
# # In this notebook we only process the main signal and the generic events,
# # for illustration purposes.
# # You can add other backgrounds after if you wish.
# samples = ["Signal","All","ccbar",]

# DataFrames = {}  # define empty dictionary to hold dataframes
# Date = "0429"
# Attempt = "0"

# # Signal:
# DataFrames[samples[0]] =  uproot.concatenate("/home/belle2/amubarak/C01-Simulated_Events/Ds2D0enu-Signal.root:Dstree",library='pd')
# # Background
# for s in samples[1:]: # loop over samples
#     DataFrames[s] =  uproot.concatenate("/group/belle2/users2022/amubarak/02-Grid/Sample_KEKCC/Ds2D0e-Generic_Ds_" + Date +"25_"+ Attempt +"_"+ s +".root:Dstree",library='pd')

In [None]:
# In this notebook we only process the main signal and the generic events,
# for illustration purposes.
# You can add other backgrounds after if you wish.
samples = ["Signal","BB","ccbar","ddbar","ssbar","taupair","uubar"]

DataFrames = {}  # define empty dictionary to hold dataframes
Date = "0419"
Attempt = "0"

# Signal:
DataFrames[samples[0]] =  uproot.concatenate("/home/belle2/amubarak/C01-Simulated_Events/Ds2D0enu-Signal.root:Dstree",library='pd')
# Background
for s in samples[1:]: # loop over samples
    DataFrames[s] =  uproot.concatenate("/group/belle2/users2022/amubarak/02-Grid/Sample_Grid/Ds2D0e-Generic_Ds_" + Date +"25_"+ Attempt +"_"+ s +".root:Dstree",library='pd')

# Define background samples
background_samples = ["BB", "ccbar", "ddbar", "ssbar", "taupair", "uubar"]

# Concatenate all background DataFrames into one called "All"
DataFrames["All"] = pd.concat([DataFrames[s] for s in background_samples], ignore_index=True)

In [None]:
DataFrames["All"].columns.tolist()

### Setup
The code below will be used to apply cuts to the data.

In [None]:
# Electron ID
#-------------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['e_electronID']>=0.95]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['e_electronID']>=0.95]
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_gammaveto_em_electronID']>=0.95]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_gammaveto_em_electronID']>=0.95]

# Photon Conversion
#-------------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_gammaveto_M_Correction']>=0.1]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_gammaveto_M_Correction']>=0.1]

# Peaking Background Removal
#----------------------------
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_diff_D0pi']>=0.15)]
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['Ds_diff_D0pi']>=0.15)]

# # Vertex Fitting
# #----------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_chiProb']>=0.01]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_chiProb']>=0.01]

# Dalitz Removal
#----------------------------
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_pi0veto_M_Correction']<=0.08) | (DataFrames["ccbar"]['Ds_pi0veto_M_Correction']>=0.16)]
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['Ds_pi0veto_M_Correction']<=0.08) | (DataFrames["Signal"]['Ds_pi0veto_M_Correction']>=0.16)]

# Vertex Fit
#----------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames[samples[0]]['Ds_chiProb_Ds_rank']==1]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames[samples[1]]['Ds_chiProb_Ds_rank']==1]

# # D0 Invariant Mass
# #-----------------------
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['D0_dM']>=-0.02) & (DataFrames["Signal"]['D0_dM']<=0.02)]
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['D0_dM']>=-0.02) & (DataFrames["ccbar"]['D0_dM']<=0.02)]
# DataFrames["All"] = DataFrames["All"][(DataFrames["All"]['D0_dM']>=-0.02) & (DataFrames["All"]['D0_dM']<=0.02)]

In [None]:
print(DataFrames["All"].query('')[['']].value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))

In [None]:
print(abs(DataFrames["All"][(abs(DataFrames["All"]['Ds_D0_Dstarplus'])==1) & (abs(DataFrames["All"]['D0_isSignal'])==1) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=0.531) & (DataFrames["All"]['Ds_diff_D0pi']>=0.16)]['D0_isSignal']).value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))
print(abs(DataFrames["All"][(abs(DataFrames["All"]['Ds_D0_Dstar0'])==1) & (abs(DataFrames["All"]['D0_isSignal'])==1) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=0.531) & (DataFrames["All"]['Ds_diff_D0pi']>=0.16)]['D0_isSignal']).value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))
print(abs(DataFrames["All"][(abs(DataFrames["All"]['Ds_D0_NoDstarplusDstar0'])==1) & (abs(DataFrames["All"]['D0_isSignal'])==1) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=0.531) & (DataFrames["All"]['Ds_diff_D0pi']>=0.16)]['D0_isSignal']).value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))

FakeD0 = DataFrames["All"][(abs(DataFrames["All"]['Ds_D0_Other'])==1) | ((abs(DataFrames["All"]['D0_mcPDG'])==421) & (abs(DataFrames["All"]['D0_isSignal'])==0)) & (DataFrames["All"]["Ds_extraInfo_BkgBDT"]>=0.531)]
FakeD0 = FakeD0[FakeD0["Ds_extraInfo_BkgBDT"]>=0.531]
FakeD0 = FakeD0[FakeD0['Ds_diff_D0pi']>=0.16]
print(abs(FakeD0['D0_isSignal']).value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))

# 1-D Histogram

In [None]:
Stacked = False
Density = True
Bins = 50
var = 'Ds_gammaveto_M_Correction'
# i = 'Ds_massDifference_0'
Range = [0.0, 0.4]
dM = -1
FD = -1
BS = -1
eID = -1
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$m(e_{sig}^{+} e_{ROE}^{-}) \geq 0.06 GeV/c^{2}$'
label2= r'$m(e_{sig}^{+} e_{ROE}^{-}) \leq 0.06 GeV/c^{2}$'

labels=[label1,label2]
colors=["#007C91"]
data=[
      DataFrames["All"][(abs(DataFrames["All"]['Ds_mcPDG'])==423)][var],
      ]

# factor = 0.7
# plt.hist(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var], label="Signal", histtype='step', density=Density, bins=Bins, alpha=1, range=Range, weights=factor*np.ones_like(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var]), ls='--', linewidth=1.5)
plt.hist(data, color=colors, label=labels, density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=2, range=Range)
# plt.axvspan(Range[0],0.16,color='gray',alpha=0.2)
plt.axvline(0.06,ls='--',color='gray')

# Title
#--------
plt.title(r'$D^{*0} \rightarrow D^{0} \; \pi^{0} / \gamma$ (Peak)', loc = "left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc = "right")
# Label
#-------
plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
# plt.xlabel(r'$\Delta m_{\pi}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
plt.xlabel(r'$m(e_{sig}^{+} e_{ROE}^{-})\;[GeV/c^{2}]$')
# plt.yscale("log")
# plt.xscale("log")
# plt.ylim(0, 30000)
# plt.legend()
plt.show()

In [None]:
Stacked = False
Density = True
Bins = 50
# var = 'D0_dM'
var = 'Ds_diff_D0pi'
# var = 'Ds_massDifference_0'
# var = 'e_pt'
# var = 'e_cos_theta'
# var = 'e_dz'
# var = 'e_firstSVDLayer'
# var = 'e_nSVDHits'
# var = 'e_nCDCHits'
Range = [0.1, 0.55]
BD = -1
# perBin = ((Range[1] - Range[0])/Bins)
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$isSignal(D_s^{+})=1$'
label2= r'$isSignal(D_s^{+})=0$'
label3= r'$NaN$'

labels=[label1,label2,label3]
colors=["#4C6EB1",'#fd7f6f','purple']

data = [DataFrames["Signal"][(DataFrames["Signal"]['Ds_isSignal']==1)][var],
        DataFrames["Signal"][(DataFrames["Signal"]['Ds_isSignal']==0)][var],
        DataFrames["Signal"][(DataFrames["Signal"]['Ds_isSignal'].isna())][var]
       ]


plt.hist(data[::-1], color=colors[::-1], label=labels[::-1], alpha=1, range=Range, linewidth=1.5, stacked=Stacked, density=Density, bins=Bins, histtype='step')
# plt.axvspan(Range[0],-0.02,color='gray',alpha=0.2)
# plt.axvspan(0.02,Range[1],color='gray',alpha=0.2)
# plt.axvline(-0.02,ls='--',color='gray')
# plt.axvline(0.02,ls='--',color='gray')

# Title
#---------
# Signal
plt.title(r'$2M\;Events$', loc = "left")
plt.title(r'$\bf Signal\;Events$', loc = "right")
# # Background
# plt.title(r'$\int\mathcal{L}dt\approx\;100$ fb$^{-1}$', loc = "left")
# plt.title(r'$\bf Generic\;c\bar{c}\;Events$', loc = "right")
# Label
#---------
# plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
# plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c)$'.format(width = perBin))
plt.ylabel(r'$Entries$'.format(width = perBin))
# plt.xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
# plt.xlabel(r'$p_{t} (e^{+})\;[GeV/c]$')
# plt.xlabel(r'$cos(\theta) (e^{+})$')
# plt.xlabel(r'$dz (e^{+})$')
plt.xlabel(r'First SVD Layer $(e^{+})$')
# plt.xlabel(r'# SVD Hits $(e^{+})$')
# plt.xlabel(r'# CDC Hits $(e^{+})$')
# plt.yscale("log") 
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt

# === USER OPTIONS ===
Density = True
Bins = 50
var = 'Ds_diff_D0pi'
Range = [0.1, 0.55]
perBin = ((Range[1] - Range[0]) / Bins) * 1000
print("Width Per Bin: {:.2f} MeV".format(perBin))

# === COLORS & LABELS ===
label_sig = r'$isSignal(D_s^{+})=1$'
label_0_peak = r'$isSignal=0,\;|PDG|=433$'
label_0_nonpeak = r'$isSignal=0,\;|PDG|\neq433$'
label_nan = r'$NaN$'

# Scientific colors (no gold!)
color_sig = "#4C6EB1"       # blue
color_0_peak = "#E15759"    # red-orange (no gold!)
color_0_nonpeak = "#FD7F6F" # coral
color_nan = "purple"

# === DATA EXTRACTION ===
df = DataFrames["Signal"]

# Signal
data_sig = df[df['Ds_isSignal'] == 1][var]

# Background split
df_is0 = df[df['Ds_isSignal'] == 0]
data_0_nonpeak = df_is0[abs(df_is0['Ds_mcPDG']) != 433][var]
data_0_peak = df_is0[abs(df_is0['Ds_mcPDG']) == 433][var]

# NaN
data_nan = df[df['Ds_isSignal'].isna()][var]

# === PLOTTING ===

# 1. Signal (unstacked)
plt.hist(data_sig,
         color=color_sig,
         label=label_sig,
         bins=Bins,
         range=Range,
         density=Density,
         histtype='step',
         linewidth=1.5)

# 2. isSignal==0 broken into two (stacked)
plt.hist([data_0_nonpeak, data_0_peak],
         color=[color_0_nonpeak, color_0_peak],
         label=[label_0_nonpeak, label_0_peak],
         bins=Bins,
         range=Range,
         density=Density,
         stacked=True,
         histtype='step',
         linewidth=1.5)

# 3. NaN (unstacked)
plt.hist(data_nan,
         color=color_nan,
         label=label_nan,
         bins=Bins,
         range=Range,
         density=Density,
         histtype='step',
         linewidth=1.5)

# === LABELS & TITLES ===
plt.title(r'$2M\;Events$', loc="left")
plt.title(r'$\bf Signal\;Events$', loc="right")
plt.ylabel(r'$Entries$')
plt.xlabel(r'First SVD Layer $(e^{+})$')  # Adjust as needed
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
print(DataFrames["Signal"].query('Ds_isSignal==0 and Ds_diff_D0pi<=0.25')[['Ds_mcPDG']].value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))

In [None]:
Stacked = False
Density = True
Bins = 50
# i = 'e_pt'
# i = 'e_cos_theta'
# i = 'e_firstPXDLayer'
# i = 'e_firstSVDLayer'
# i = 'e_dr'
# i = 'e_dz'
# i = 'e_omega'
# i = 'e_nSVDHits'
i = 'e_nCDCHits'
Range = [0,50]
perBin = ((Range[1] - Range[0])/Bins)*1000
# perBin = ((Range[1] - Range[0])/Bins)
print("Width Per Bin: {width:.3f} MeV".format(width = perBin))

label1= r'$mcPDG(e^{+})=11$'
label2= r'$mcPDG(e^{+})=211$'
label3= r'$NaN$'

labels=[label1,label2,label3]
colors = ['#800000', '#7B1FA2', '#B8860B']
data=[
      DataFrames["All"][(abs(DataFrames["All"]['e_mcPDG'])==11)][i],
      DataFrames["All"][(abs(DataFrames["All"]['e_mcPDG'])==211)][i],
      DataFrames["All"][(abs(DataFrames["All"]['e_mcPDG']).isna())][i]
     ]

# plt.hist(DataFrames["Signal"][i], label="Signal", color='#7eb0d5',density=Density, bins=Bins, alpha=0.8, range=Range)
plt.hist(data, color=colors, label=labels, density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=2, range=Range)

# Title
#--------
plt.title(r'$D_s^{+} \rightarrow [D^{0} \rightarrow K^{-} \pi^{+}] e^{+} \nu_{e}$', loc = "left")
# plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc = "right")
# Label
#-------
# plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c)$'.format(width = perBin))
# plt.ylabel(r'$Entries/({width:.3f})$'.format(width = perBin))
plt.ylabel(r'$Entries$'.format(width = perBin))
# plt.xlabel(r'$p_{t}(e^{+})[GeV/c]$')
# plt.xlabel(r'$cos(\theta)(e^{+})$')
plt.xlabel(r'# CDC Hits $(e^{+})$')
# plt.xlabel(r'$dz(e^{+}) [cm]$')
# plt.xlabel(r'$\omega (e^{+}) [cm^{-1}]$')
# plt.xlabel(r'First SVD Layer $(e^{+})$')
plt.yscale("log")
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt

x = 'e_dr'
y = 'e_dz'

labels = [r'$mcPDG(e^{+})=11$', r'$mcPDG(e^{+})=211$', r'$NaN$']
colors = ['#800000', '#7B1FA2', '#B8860B']
categories = [
    DataFrames["All"][(abs(DataFrames["All"]['e_mcPDG'])==11)],
    DataFrames["All"][(abs(DataFrames["All"]['e_mcPDG'])==211)],
    DataFrames["All"][(abs(DataFrames["All"]['e_mcPDG']).isna())]
]

plt.figure(figsize=(7, 6))
for df, label, color in zip(categories, labels, colors):
    plt.hist2d(df[x], df[y], bins=50, range=[[0, 0.3], [-3, 3]], cmap='viridis', cmin=1)
    plt.title(label)
    plt.xlabel(r'$d_{r}(e^{+})\;[\mathrm{cm}]$')
    plt.ylabel(r'$d_{z}(e^{+})\;[\mathrm{cm}]$')
    plt.colorbar(label='Entries')
    plt.grid(False)
    plt.tight_layout()
    plt.show()

In [None]:
x = 'e_pt'
y = 'e_dr'

plt.figure(figsize=(7, 6))
for df, label, color in zip(categories, labels, colors):
    plt.hist2d(df[x], df[y], bins=50, range=[[0, 0.5], [0, 0.3]], cmap='plasma', cmin=1)
    plt.title(label)
    plt.xlabel(r'$p_{T}(e^{+})\;[\mathrm{GeV}/c]$')
    plt.ylabel(r'$d_{r}(e^{+})\;[\mathrm{cm}]$')
    plt.colorbar(label='Entries')
    plt.grid(False)
    plt.tight_layout()
    plt.show()


In [None]:
x = 'e_cos_theta'
y = 'e_pt'

plt.figure(figsize=(7, 6))
for df, label, color in zip(categories, labels, colors):
    plt.hist2d(df[x], df[y], bins=50, range=[[-1, 1], [0, 0.5]], cmap='inferno', cmin=1)
    plt.title(label)
    plt.xlabel(r'$\cos(\theta)(e^{+})$')
    plt.ylabel(r'$p_{T}(e^{+})\;[\mathrm{GeV}/c]$')
    plt.colorbar(label='Entries')
    plt.grid(False)
    plt.tight_layout()
    plt.show()


In [None]:
Stacked = True
Density = False
Bins = 50
i = 'Ds_diff_D0pi'
# i = 'Ds_massDifference_0'
Range = [0.1, 0.55]
BD = -1
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$c \bar{c}$'
label2= r'$u \bar{u}, \; d \bar{d}, \;s \bar{s}$'
label3= r'$BB$'
label4= r'$\tau^{+} \tau^{-}$'

labels=[label1,label2,label3,label4]
# colors=['C2','C1','C3','C4','C5']
data=[DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_extraInfo_BkgBDT']>=BD)][i],
      DataFrames["uds"][(DataFrames["uds"]['Ds_extraInfo_BkgBDT']>=BD)][i],
      DataFrames["BB"][(DataFrames["BB"]['Ds_extraInfo_BkgBDT']>=BD)][i],
      DataFrames["taupair"][(DataFrames["taupair"]['Ds_extraInfo_BkgBDT']>=BD)][i],
      ]

# factor = 0.5
# plt.hist(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BD)][i], label="Signal", histtype='step', density=Density, bins=Bins, alpha=1, range=Range, weights=factor*np.ones_like(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BD)][i]), ls='--', linewidth=1.5)
plt.hist(data[::-1], label=labels[::-1], density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range)
# plt.axvspan(Range[0],0.15,color='gray',alpha=0.2)
# plt.axvline(0.15,ls='--',color='gray')

# Title
#--------
plt.title(r'$D_s^{+} \rightarrow [D^{0} \rightarrow K^{-} \pi^{+}] e^{+} \nu_{e}$', loc = "left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc = "right")
# Label
#-------
plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
# plt.xlabel(r'$m(K \pi) - m_{PDG}(D^{0}) \;[GeV/c^{2}]$')
# plt.yscale("log")
# plt.xscale("log")
# plt.ylim(0, 25000)
plt.legend()
plt.show()

In [None]:
print(len(DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_extraInfo_BkgBDT']>=BD)]))
print(len(DataFrames["uds"][(DataFrames["uds"]['Ds_extraInfo_BkgBDT']>=BD)]))
print(len(DataFrames["BB"][(DataFrames["BB"]['Ds_extraInfo_BkgBDT']>=BD)]))
print(len(DataFrames["taupair"][(DataFrames["taupair"]['Ds_extraInfo_BkgBDT']>=BD)]))

In [None]:
len(DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_extraInfo_BkgBDT']>=BD)]) + len(DataFrames["BB"][(DataFrames["BB"]['Ds_extraInfo_BkgBDT']>=BD)]) + len(DataFrames["uds"][(DataFrames["uds"]['Ds_extraInfo_BkgBDT']>=BD)]) + len(DataFrames["taupair"][(DataFrames["taupair"]['Ds_extraInfo_BkgBDT']>=BD)])

In [None]:
(len(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=0.531)])/2000000)*100

In [None]:
print(DataFrames["ccbar"].query('Ds_extraInfo_BkgBDT>=-1')[['D0_isSignal']].value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))
print(DataFrames["uds"].query('Ds_extraInfo_BkgBDT>=-1')[['D0_isSignal']].value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))
print(DataFrames["BB"].query('Ds_extraInfo_BkgBDT>=-1')[['D0_isSignal']].value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))
print(DataFrames["taupair"].query('Ds_extraInfo_BkgBDT>=-1')[['D0_isSignal']].value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))

In [None]:
Stacked = False
Density = False
Bins = 50
# var = 'Ds_diff_D0pi'
var = 'Ds_massDifference_0'
Range = [0.0, 0.25]
BS = 0.849
Samples = "All"
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$Other$'
label2= r'$Prompt \; D^{0}$'
label3= r'$D^{*0} \rightarrow D^{0} X$'
label4= r'$D^{*+} \rightarrow D^{0} X$'

labels=[label1,label2,label3,label4]
colors=['C5','C4','C1','C2',]
data=[
      DataFrames["All"][((DataFrames["All"]['D0_isSignal'].isna()) | (abs(DataFrames["All"]['D0_isSignal']) == 0)) & (DataFrames[Samples]["Ds_BkgBDT"]>=BS)][var],
      DataFrames["All"][((abs(DataFrames["All"]['D0_genMotherPDG']) != 413) & (abs(DataFrames["All"]['D0_genMotherPDG']) != 423)) & (abs(DataFrames["All"]['D0_isSignal']) == 1) & (DataFrames["All"]["Ds_BkgBDT"]>=BS)][var],
      DataFrames["All"][(abs(DataFrames["All"]['D0_genMotherPDG']) == 423) & (abs(DataFrames["All"]['D0_isSignal']) == 1) & (DataFrames["All"]["Ds_BkgBDT"]>=BS)][var],
      DataFrames["All"][(abs(DataFrames["All"]['D0_genMotherPDG']) == 413) & (abs(DataFrames["All"]['D0_isSignal']) == 1) & (DataFrames["All"]["Ds_BkgBDT"]>=BS)][var],
      ]

# factor = 0.7
# plt.hist(DataFrames["Signal"][(DataFrames["Signal"]["Ds_BkgBDT"]>=BS)][var], label="Signal", histtype='step', density=Density, bins=Bins, alpha=1, range=Range, weights=factor*np.ones_like(DataFrames["Signal"][(DataFrames["Signal"]["Ds_BkgBDT"]>=BS)][var]), ls='--', linewidth=1.5)
plt.hist(data, color=colors, label=labels, density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=2, range=Range)
# plt.axvspan(Range[0],0.16,color='gray',alpha=0.2)
# plt.axvline(0.16,ls='--',color='gray')

# Title
#--------
# plt.title(r'$\bf Generic \; Events$', loc = "left")
plt.title(r'$\bf Generic \; Events$' + "\n" + r"$BDT \geq 0.869$", loc = "left")
plt.title(r'$\int\mathcal{L}dt\approx\;1443.999$ fb$^{-1}$', loc = "right")
# Label
#-------
plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
plt.xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
# plt.yscale("log")
# plt.xscale("log")
# plt.ylim(0, 30000)
plt.legend()
plt.show()

In [None]:
# Stacked = True
# Density = False
# Bins = 50
# i = 'Ds_diff_D0pi'
# # i = 'Ds_massDifference_0'
# Range = [0.1, 0.55]
# BD = -1
# perBin = ((Range[1] - Range[0])/Bins)*1000
# print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

# label1= r'$c \bar{c}$'
# label2= r'$charged$'
# label3= r'$u \bar{u}, \; d \bar{d}, \;s \bar{s}$'
# label4= r'$mixed$'

# labels=[label1,label2,label3,label4]
# # colors=['C2','C1','C3','C4','C5']
# data=[DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_extraInfo_BkgBDT']>=BD)][i],
#       DataFrames["charged"][(DataFrames["charged"]['Ds_extraInfo_BkgBDT']>=BD)][i],
#       DataFrames["uds"][(DataFrames["uds"]['Ds_extraInfo_BkgBDT']>=BD)][i],
#       DataFrames["mixed"][(DataFrames["mixed"]['Ds_extraInfo_BkgBDT']>=BD)][i],
#       ]

# factor = 3
# plt.hist(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BD)][i], label="Signal", histtype='step', density=Density, bins=Bins, alpha=1, range=Range, weights=factor*np.ones_like(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BD)][i]), ls='--', linewidth=1.5)
# plt.hist(data[::-1], label=labels[::-1], density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range)
# # plt.axvspan(Range[0],0.15,color='gray',alpha=0.2)
# # plt.axvline(0.15,ls='--',color='gray')

# # Title
# #--------
# # plt.title(r'$BCS: Vertex \; Fit \; \chi^{2}$', loc = "left")
# plt.title(r'$D_s^{+} \rightarrow D^{0} e^{+} \nu_{e}$', loc = "left")
# plt.title(r'$\int\mathcal{L}dt\approx\;1443.999$ fb$^{-1}$', loc = "right")
# # Label
# #-------
# plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
# plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
# # plt.yscale("log")
# # plt.xscale("log")
# # plt.ylim(0, 25000)
# plt.legend()
# plt.show()

In [None]:
Stacked = False
Density = False
Bins = 50
# var = 'Ds_diff_D0pi'
var = 'Ds_useAlternativeDaughterHypothesis_massDifference_0_1_K'
# i = 'Ds_massDifference_0'
Range = [0.3, 0.8]
dM = -1
FD = -1
BS = 0.531
i = 0
Samples = "All"
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$Other$'
label2= r'$D^{0}$'
# label3= r'$D^{*0} \rightarrow D^{0} \; \pi^{0} / \gamma$'
label4= r'$D^{*+} \rightarrow D^{0} \; \pi^{+}$'

labels=[label1,label2,label4]
colors=['C5','C4','C2',]
data=[
      DataFrames[Samples][(abs(DataFrames[Samples]['e_mcPDG'])==11) &((abs(DataFrames[Samples]['D0_isSignal']).isna()) | ((abs(DataFrames[Samples]['D0_isSignal'])==0))) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS)][var],
      DataFrames[Samples][(abs(DataFrames[Samples]['e_mcPDG'])==11) &(abs(DataFrames[Samples]['Ds_D0_NoDstarplusDstar0'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS)][var],
#       DataFrames[Samples][(abs(DataFrames[Samples]['e_mcPDG'])==11) &(abs(DataFrames[Samples]['Ds_D0_Dstar0'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS)][var],
      DataFrames[Samples][(abs(DataFrames[Samples]['e_mcPDG'])==11) & (abs(DataFrames[Samples]['Ds_D0_Dstarplus'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS)][var],
      ]

# factor = 0.7
# plt.hist(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var], label="Signal", histtype='step', density=Density, bins=Bins, alpha=1, range=Range, weights=factor*np.ones_like(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var]), ls='--', linewidth=1.5)
plt.hist(data, color=colors, label=labels, density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range)
# plt.axvspan(Range[0],0.16,color='gray',alpha=0.2)
# plt.axvline(0.16,ls='--',color='gray')

# Title
#--------
# plt.title(r'$BDT \geq 0.531$', loc = "left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc = "right")
# Label
#-------
plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
# plt.yscale("log")
# plt.xscale("log")
# plt.ylim(0, 30000)
plt.legend()
plt.show()

In [None]:
print(DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_NoDstarplusDstar0'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS)]['D0_genMotherPDG'].value_counts(dropna=False))

In [None]:
print(len(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=0.531) & (DataFrames["Signal"]['Ds_diff_D0pi']>=0.16)]))

print(len(DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_Dstarplus'])==1) & (abs(DataFrames["ccbar"]['D0_isSignal'])==1) & (DataFrames["ccbar"]['Ds_extraInfo_BkgBDT']>=0.531) & (DataFrames["ccbar"]['Ds_diff_D0pi']>=0.16)]))
print(len(DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_Dstar0'])==1) & (abs(DataFrames["ccbar"]['D0_isSignal'])==1) & (DataFrames["ccbar"]['Ds_extraInfo_BkgBDT']>=0.531) & (DataFrames["ccbar"]['Ds_diff_D0pi']>=0.16)]))
print(len(DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_NoDstarplusDstar0'])==1) & (abs(DataFrames["ccbar"]['D0_isSignal'])==1) & (DataFrames["ccbar"]['Ds_extraInfo_BkgBDT']>=0.531) & (DataFrames["ccbar"]['Ds_diff_D0pi']>=0.16)]))

FakeD0 = DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_Other'])==1) | ((abs(DataFrames["ccbar"]['D0_mcPDG'])==421) & (abs(DataFrames["ccbar"]['D0_isSignal'])==0)) & (DataFrames["ccbar"]["Ds_extraInfo_BkgBDT"]>=0.531)]
FakeD0 = FakeD0[FakeD0["Ds_extraInfo_BkgBDT"]>=0.531]
FakeD0 = FakeD0[FakeD0['Ds_diff_D0pi']>=0.16]

print(len(FakeD0))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Configuration
Stacked = False
Density = False
Bins = 50
var = 'Ds_diff_D0pi'
Range = [0.1, 0.55]
BS = -1  # BDT Cut
Samples = "All"

# Compute bin width in MeV
perBin = ((Range[1] - Range[0]) / Bins) * 1000
print(f"Width Per Bin: {perBin:.2f} MeV")

# Define Labels and Colors
labels = [
    r'$D^{*+} \rightarrow D^{0} \pi^{+}$',
    r'$D^{*0} \rightarrow D^{0} \gamma, D^{0} \pi^{0}$',
    r'$D^{0}$',
    r'$Other$'
]
colors = ['C2', 'C1', 'C4', 'C5']

# Define Scaling Factors for Each Background Component
background_factors = [1.1, 0.6, 1.3, 0.9]  # Adjust as needed

# Select Data for Background Categories
data = [
    DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstarplus']) == 1) &
                        (abs(DataFrames[Samples]['D0_isSignal']) == 1) &
                        (DataFrames[Samples]['Ds_extraInfo_BkgBDT'] >= BS)][var],
    DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstar0']) == 1) &
                        (abs(DataFrames[Samples]['D0_isSignal']) == 1) &
                        (DataFrames[Samples]['Ds_extraInfo_BkgBDT'] >= BS)][var],
    DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_NoDstarplusDstar0']) == 1) &
                        (abs(DataFrames[Samples]['D0_isSignal']) == 1) &
                        (DataFrames[Samples]['Ds_extraInfo_BkgBDT'] >= BS)][var],
    DataFrames[Samples][((abs(DataFrames[Samples]['Ds_D0_Other']) == 1) |
                         ((abs(DataFrames[Samples]['D0_mcPDG']) == 421) &
                          (abs(DataFrames[Samples]['D0_isSignal']) == 0))) &
                        (DataFrames[Samples]['Ds_extraInfo_BkgBDT'] >= BS)][var]
]

# Compute Weights for Each Background Component
background_weights = [np.full(len(d), factor) for d, factor in zip(data, background_factors)]

# Compute Weights for Signal
factor = 0.02  # Signal Scaling Factor
signal_data = DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT'] >= BS)][var]
signal_weights = np.full(len(signal_data), factor)

# # Plot Signal (Dashed Line)
# plt.hist(signal_data, label="Signal", histtype='step', density=Density,
#          bins=Bins, alpha=1, range=Range, weights=signal_weights, ls='--', linewidth=1.5)

# Plot Backgrounds with Individual Scaling Factors
plt.hist(data, color=colors, label=labels, density=Density, stacked=Stacked,
         bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range,
         weights=background_weights)

# Titles and Labels
# plt.title(r'$BDT \geq 0.531$', loc="left")
plt.title(r'$\int\mathcal{L}dt\approx\;1443.999$ fb$^{-1}$', loc="right")
plt.ylabel(f'Entries/({perBin:.2f} MeV/$c^2$)')
plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})$ [GeV/$c^2$]')

# Auto-adjust y-axis limit
plt.ylim(0, max(np.histogram(np.concatenate(data), bins=Bins, range=Range)[0]) * 1.1)

# Log Scale (Optional)
# plt.yscale("log")
plt.ylim(0, 20000)
plt.legend()
plt.show()

In [None]:
Stacked = False
Density = False
Bins = 50
var = 'Ds_diff_D0pi'
# i = 'Ds_massDifference_0'
Range = [0.1, 0.55]
dM = -1
FD = -1
BS = 0.531
i = 0
Samples = "All"
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$BDT \geq 0.531$'
label2= r'$No \; BDT \; Cut$'

labels=[label1,label2]
colors=["#1f77b4", "#2ca02c"]
data=[
      DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstar0'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS)][var],
      DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstar0'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=-1)][var],
      ]

# factor = 0.7
# plt.hist(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var], label="Signal", histtype='step', density=Density, bins=Bins, alpha=1, range=Range, weights=factor*np.ones_like(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var]), ls='--', linewidth=1.5)
plt.hist(data, color=colors, label=labels, density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range)
# plt.axvspan(Range[0],0.16,color='gray',alpha=0.2)
# plt.axvline(0.16,ls='--',color='gray')

# Title
#--------
plt.title(r'$D^{*0} \rightarrow D^{0} \; \pi^{0} / \gamma$', loc = "left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc = "right")
# Label
#-------
plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
# plt.yscale("log")
# plt.xscale("log")
# plt.ylim(0, 30000)
plt.legend()
plt.show()

# Shift

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Extract your dataframe
df = DataFrames["All"]

# Compute the Δm shift
df["delta_m_shift"] = df["Ds_diff_D0pi"] - df["Ds_massDifference_0"]

# Set ranges and binning
p_min, p_max = 0.0, 0.7
shift_min, shift_max = 0.0, 0.3

n_bins_p = 70  # momentum bins (10 MeV wide)
n_bins_shift = 70  # shift axis bins

# Create the 2D histogram
# plt.figure(figsize=(8, 6))
hist = plt.hist2d(
    df["e_p"], df["delta_m_shift"],
    bins=[n_bins_p, n_bins_shift],
    range=[[p_min, p_max], [shift_min, shift_max]],
    cmap="viridis"
)

# Add colorbar and labels
plt.colorbar(label="Entries")
plt.xlabel("Electron Candidate Momentum $p_{lab}(e^{+})$ [GeV/$c$]", fontsize=12)
plt.ylabel(r"Shift in $\Delta m$ ($\Delta m_\pi - \Delta m_e$) [GeV/$c^2$]", fontsize=12)
plt.title(r"Signal MC: $\Delta m$ Shift vs Electron Momentum", fontsize=14)
# plt.tight_layout()
plt.show()



In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Extract your dataframe
df = DataFrames["All"]

# Apply the dielectron mass cut (Photon Conversion Region)
# You may need to adjust the name of the dielectron mass variable
# I will assume it's something like 'dielectron_mass'
# If your dielectron mass variable is named differently, change 'dielectron_mass' to your column name
dielectron_mass_var = 'Ds_gammaveto_M_Correction'

# Select events inside the dielectron invariant mass peak
mask_dielectron_peak = ((abs(df['D0_isSignal']).isna()) | ((abs(df['D0_isSignal'])==0))) & (df[dielectron_mass_var] > 0.06) & (df['Ds_gammaveto_em_electronID']>=0.1)
df_selected = df[mask_dielectron_peak].copy()

# Compute the Δm shift (if not already computed)
df_selected["delta_m_shift"] = df_selected["Ds_diff_D0pi"] - df_selected["Ds_massDifference_0"]

# Set ranges and binning
p_min, p_max = 0.0, 0.7  # GeV/c
shift_min, shift_max = 0.0, 0.3  # GeV/c^2

n_bins_p = 70  # momentum bins (10 MeV wide)
n_bins_shift = 70  # shift axis bins

# Create the 2D histogram
plt.figure(figsize=(8, 6))
hist = plt.hist2d(
    df_selected["e_p"], df_selected["delta_m_shift"],
    bins=[n_bins_p, n_bins_shift],
    range=[[p_min, p_max], [shift_min, shift_max]],
    cmap="viridis"
)

# Add colorbar and labels
plt.colorbar(label="Entries")
plt.xlabel("Electron Candidate Momentum $p_{lab}(e^{+})$ [GeV/$c$]", fontsize=13)
plt.ylabel(r"Shift in $\Delta m$ ($\Delta m_\pi - \Delta m_e$) [GeV/$c^2$]", fontsize=13)
plt.title(r"Photon Conversion Region: $\Delta m$ Shift vs Electron Momentum", fontsize=14)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Extract your dataframe
df = DataFrames["All"]

# Apply the dielectron mass cut (Photon Conversion Region)
# You may need to adjust the name of the dielectron mass variable
# I will assume it's something like 'dielectron_mass'
# If your dielectron mass variable is named differently, change 'dielectron_mass' to your column name
dielectron_mass_var = 'Ds_gammaveto_M_Correction'

# Select events inside the dielectron invariant mass peak
mask_dielectron_peak = ((abs(df['D0_isSignal']).isna()) | ((abs(df['D0_isSignal'])==0))) & (df[dielectron_mass_var] < 0.06) & (df['Ds_gammaveto_em_electronID']>=0.1)
df_selected = df[mask_dielectron_peak].copy()

# Compute the Δm shift (if not already computed)
df_selected["delta_m_shift"] = df_selected["Ds_diff_D0pi"] - df_selected["Ds_massDifference_0"]

# Set ranges and binning
p_min, p_max = 0.0, 0.7  # GeV/c
shift_min, shift_max = 0.0, 0.3  # GeV/c^2

n_bins_p = 70  # momentum bins (10 MeV wide)
n_bins_shift = 70  # shift axis bins

# Create the 2D histogram
plt.figure(figsize=(8, 6))
hist = plt.hist2d(
    df_selected["e_p"], df_selected["delta_m_shift"],
    bins=[n_bins_p, n_bins_shift],
    range=[[p_min, p_max], [shift_min, shift_max]],
    cmap="viridis"
)

# Add colorbar and labels
plt.colorbar(label="Entries")
plt.xlabel("Electron Candidate Momentum $p_{lab}(e^{+})$ [GeV/$c$]", fontsize=13)
plt.ylabel(r"Shift in $\Delta m$ ($\Delta m_\pi - \Delta m_e$) [GeV/$c^2$]", fontsize=13)
plt.title(r"Photon Conversion Region: $\Delta m$ Shift vs Electron Momentum", fontsize=14)
plt.tight_layout()
plt.show()