In [None]:
import uproot

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns

import sys

In [None]:
plt.rcParams.update({
    "axes.labelsize": 14,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 12,
    "figure.titlesize": 16
})

In [None]:
sys.path.append("/home/belle2/amubarak/Ds2D0enue_Analysis/07-Python_Functions/")

# Prep-Work

### Import Data

In [None]:
# # In this notebook we only process the main signal and the generic events,
# # for illustration purposes.
# # You can add other backgrounds after if you wish.
# samples = ["Signal","All","ccbar"]

# DataFrames = {}  # define empty dictionary to hold dataframes
# Date = "0406"
# Attempt = "0"

# # Signal:
# DataFrames["Signal"] =  uproot.concatenate("/home/belle2/amubarak/C01-Simulated_Events/Ds2D0enu-Signal.root:Dstree",library='pd')
# # Background
# for s in samples[1:]: # loop over samples
#     DataFrames[s] =  uproot.concatenate("/group/belle2/users2022/amubarak/02-Grid/Completed/Ds2D0e-Generic_Ds_" + Date +"25_"+ Attempt +"_"+ s +".root:Dstree",library='pd')

In [None]:
# In this notebook we only process the main signal and the generic events,
# for illustration purposes.
# You can add other backgrounds after if you wish.
samples = ["Signal","All","BB","ccbar","ddbar","ssbar","taupair","uubar","uds"]
DataFrames = {}  # define empty dictionary to hold dataframes

# Signal:
DataFrames[samples[0]] =  uproot.concatenate("/home/belle2/amubarak/C01-Simulated_Events/Ds2D0enu-Signal.root:Dstree",library='pd')
# Background
for s in samples[1:]: # loop over samples
    DataFrames[s] =  uproot.concatenate("/group/belle2/users2022/amubarak/TopoAna/Completed_TopoAna/TopoAna_"+ s +".root:Dstree",library='pd')

In [None]:
pd.set_option('display.max_rows', 200000)
pd.set_option('display.max_columns', 200000)

The line below is to look at the available variables.

In [None]:
DataFrames["ccbar"].columns.tolist()

### Setup
The code below will be used to apply cuts to the data.  
The range of the plots.

In [None]:
# Electron ID
#-------------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['e_electronID']>=0.95]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['e_electronID']>=0.95]
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_gammaveto_em_electronID']>=0.95]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_gammaveto_em_electronID']>=0.95]

# Fake D0 Suppression
#------------------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_extraInfo_FastBDT']>=0.58]
# DataFrames["All"] = DataFrames["All"][DataFrames["All"]['Ds_extraInfo_FastBDT']>=0.58]

# Peaking Background Removal
#----------------------------
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_diff_D0pi']>=0.15)]
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['Ds_diff_D0pi']>=0.15)]

# Photon Conversion
#-------------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_gammaveto_M_Correction']<=0.1]
# DataFrames["All"] = DataFrames["All"][DataFrames["All"]['Ds_gammaveto_M_Correction']<=0.1]

# # Vertex Fitting
# #----------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_chiProb']>=0.01]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_chiProb']>=0.01]

# Vertex Fit
#----------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_chiProb_Ds_rank']==1]
# DataFrames["All"] = DataFrames["All"][DataFrames["All"]['Ds_chiProb_Ds_rank']==1]

In [None]:
DataFrames["All"]['Ds_gammaveto_em_electronID'] = DataFrames["All"]['Ds_gammaveto_em_electronID'].replace(np.nan, 10)

In [None]:
print(abs(DataFrames["All"][(DataFrames["All"]['Ds_extraInfo_BkgBDT']>=0.531)]['pi_mcPDG']).value_counts(normalize=True,dropna=False))

In [None]:
print((DataFrames["ccbar"]['Ds_mcPDG']).value_counts(normalize=False,dropna=False))

In [None]:
DataFrames["ccbar"].isna().sum()

# Truth-matching

In [None]:
print(abs(DataFrames["All"]['Ds_mcPDG']).value_counts(normalize=True,dropna=False).apply(lambda x: f"{x:.6f}"))

In [None]:
print(abs(DataFrames["All"]['Ds_mcPDG']).value_counts(normalize=True,dropna=False))
print(abs(DataFrames["All"]['Ds_genMotherPDG']).value_counts(normalize=True,dropna=False))

In [None]:
print(abs(DataFrames["All"]['D0_mcPDG']).value_counts(normalize=True,dropna=False))
print(abs(DataFrames["All"]['D0_genMotherPDG']).value_counts(normalize=True,dropna=False))

In [None]:
print(abs(DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_Dstarplus'])==1)][['D0_genMotherPDG']]).value_counts(normalize=True,dropna=False))
print(abs(DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_Dstar0'])==1)][['D0_genMotherPDG']]).value_counts(normalize=True,dropna=False))
print(abs(DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_NoDstarplusDstar0'])==1)][['e_genMotherPDG']]).value_counts(normalize=True,dropna=False))
print(abs(DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_Other'])==1)][['D0_genMotherPDG']]).value_counts(normalize=True,dropna=False))

In [None]:
print(len(DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_Dstarplus'])==1)]))
print(len(DataFrames["ccbar"][(abs(DataFrames["ccbar"]['Ds_D0_Dstar0'])==1)]))

# Single Variable  
The code below is to look at any specific variable closely.

In [None]:
import matplotlib.pyplot as plt

# Configuration
Bins = 50
Range = [0.1, 0.55]
Samples = "All"
BS = 0.531
var = 'Ds_diff_D0pi'
perBin = ((Range[1] - Range[0])/Bins)*1000

# Labels and titles for background categories
bg_labels = [
    r'$D^{*+} \rightarrow D^{0} \; \pi^{+}$',
    r'$D^{*0} \rightarrow D^{0} \; \pi^{0} / \gamma$',
    r'$D^{0}$',
    r'$Other$'
]

# Selection conditions for background types
bkg_cut = DataFrames[Samples]['Ds_extraInfo_BkgBDT'] >= BS
bg_conditions = [
    (abs(DataFrames[Samples]['Ds_D0_Dstarplus']) == 1) & (abs(DataFrames[Samples]['D0_isSignal']) == 1),
    (abs(DataFrames[Samples]['Ds_D0_Dstar0']) == 1) & (abs(DataFrames[Samples]['D0_isSignal']) == 1),
    (abs(DataFrames[Samples]['Ds_D0_NoDstarplusDstar0']) == 1) & (abs(DataFrames[Samples]['D0_isSignal']) == 1),
    (DataFrames[Samples]['D0_isSignal'].isna()) | (abs(DataFrames[Samples]['D0_isSignal']) == 0)
]

# Electron truth-match categories
tm_labels = [r'Truth-match Failed', r'$e^{-}$', r'$\pi^{+}$']
tm_colors = ["#007C91", "#4C6EB1", "#2E2E2E"]  # Scientific, visible colors

for bg_label, bg_cond in zip(bg_labels, bg_conditions):
    subset = DataFrames[Samples][bg_cond & bkg_cut]
    
    # Break into electron truth-match categories
    truth_fail = subset[subset['e_mcPDG'].isna()][var]
    true_elec  = subset[abs(subset['e_mcPDG']) == 11][var]
    true_pion  = subset[abs(subset['e_mcPDG']) == 211][var]
    
    # Plot
    plt.hist(
        [truth_fail, true_elec, true_pion],
        label=tm_labels,
        color=tm_colors,
        bins=Bins,
        range=Range,
        histtype='step',
        linewidth=1.5,
        stacked=False
    )
    
    # Titles and labels
    plt.title(bg_label, loc='left')
    plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc='right')
    plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
    plt.ylabel(r'$Entries/(\;{:.2f}\;MeV/c^2)$'.format(perBin))
    
    # Custom Y-axis limit for D*+ category only
    if bg_label == r'$D^{*+} \rightarrow D^{0} \; \pi^{+}$':
        plt.ylim(0, 2000)  # <-- adjust this limit as needed

    plt.legend()
    # plt.tight_layout()
    plt.show()



In [None]:
Stacked = True
Density = False
Bins = 50
var = 'Ds_diff_D0pi'
# var = 'Ds_massDifference_0'
# var = "Ds_gammaveto_M_Correction"
Range = [0.1, 0.55]
dM = 0.06
FD = -1
BS = -1
eID = 0.1
i = 0
Samples = "All"
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$Other$'
label2= r'$D^{0}$'
label3= r'$D^{*0} \rightarrow D^{0} \; \pi^{0} / \gamma$'
label4= r'$D^{*+} \rightarrow D^{0} \; \pi^{+}$'

labels=[label1,label2,label3,label4]
colors=['C5','C4','C1','C2',]
data=[
      DataFrames[Samples][((abs(DataFrames[Samples]['D0_isSignal']).isna()) | ((abs(DataFrames[Samples]['D0_isSignal'])==0))) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS) & (DataFrames[Samples]["Ds_gammaveto_M_Correction"]<=dM) & (DataFrames[Samples]['Ds_gammaveto_em_electronID']>=eID)][var],
      DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_NoDstarplusDstar0'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS) & (DataFrames[Samples]["Ds_gammaveto_M_Correction"]<=dM) & (DataFrames[Samples]['Ds_gammaveto_em_electronID']>=eID)][var],
      DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstar0'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS) & (DataFrames[Samples]["Ds_gammaveto_M_Correction"]<=dM) & (DataFrames[Samples]['Ds_gammaveto_em_electronID']>=eID)][var],
      DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstarplus'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS) & (DataFrames[Samples]["Ds_gammaveto_M_Correction"]<=dM) & (DataFrames[Samples]['Ds_gammaveto_em_electronID']>=eID)][var],
      ]

# factor = 0.7
# plt.hist(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var], label="Signal", histtype='step', density=Density, bins=Bins, alpha=1, range=Range, weights=factor*np.ones_like(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var]), ls='--', linewidth=1.5)
plt.hist(data, color=colors, label=labels, density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range)
# plt.axvspan(0.06,Range[1],color='gray',alpha=0.2)
# plt.axvline(0.06,ls='--',color='gray')

# Title
#--------
plt.title(r'$m(e_{sig}^{+} e_{ROE}^{-}) \leq 0.06 GeV/c^2$', loc = "left")
# plt.title(r'$electronID(e_{ROE}^{-}) \geq 0.1$', loc = "left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc = "right")
# Label
#-------
plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
# plt.xlabel(r'$m(e_{sig}^{+} e_{ROE}^{-})\;[GeV/c^{2}]$')
# plt.yscale("log")
# plt.xscale("log")
# plt.ylim(0, 3600)
plt.legend(loc='upper right')
plt.show()

In [None]:
Stacked = False
Density = True
Bins = 50
# var = 'Ds_diff_D0pi'
var = 'Ds_massDifference_0'
# var = 'Ds_diff_D0pi'
Range = [0.0, 0.25]
dM = 0.162
FD = -1
BS = -1
eID = -1
i = 0
Samples = "All"
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$Other$'
label2= r'$D^{0}$'
label3= r'$D^{*0} \rightarrow D^{0} \; \pi^{0} / \gamma$'
label4= r'$D^{*+} \rightarrow D^{0} \; \pi^{+}$'

labels=[label1,label2,label3,label4]
colors=['C5','C4','C1','C2',]
data=[
      DataFrames[Samples][((abs(DataFrames[Samples]['D0_isSignal']).isna()) | ((abs(DataFrames[Samples]['D0_isSignal'])==0))) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS) & (DataFrames[Samples]['Ds_diff_D0pi']>=dM) & (DataFrames[Samples]['Ds_gammaveto_em_electronID']>=eID)][var],
      DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_NoDstarplusDstar0'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS) & (DataFrames[Samples]['Ds_diff_D0pi']>=dM) & (DataFrames[Samples]['Ds_gammaveto_em_electronID']>=eID)][var],
      DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstar0'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS) & (DataFrames[Samples]['Ds_diff_D0pi']>=dM) & (DataFrames[Samples]['Ds_gammaveto_em_electronID']>=eID)][var],
      DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstarplus'])==1) & (abs(DataFrames[Samples]['D0_isSignal'])==1) & (DataFrames[Samples]['Ds_extraInfo_BkgBDT']>=BS) & (DataFrames[Samples]['Ds_diff_D0pi']>=dM) & (DataFrames[Samples]['Ds_gammaveto_em_electronID']>=eID)][var],
      ]

# factor = 0.7
# plt.hist(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var], label="Signal", histtype='step', density=Density, bins=Bins, alpha=1, range=Range, weights=factor*np.ones_like(DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT']>=BS)][var]), ls='--', linewidth=1.5)
plt.hist(data, color=colors, label=labels, density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range)
# plt.axvspan(0.06,Range[1],color='gray',alpha=0.2)
# plt.axvline(0.06,ls='--',color='gray')

# Title
#--------
# plt.title(r'$m(e_{sig}^{+} e_{ROE}^{-}) \leq 0.06 GeV/c^2$', loc = "left")
# plt.title(r'$electronID(e_{ROE}^{-}) \geq 0.1$', loc = "left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc = "right")
# Label
#-------
plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
# plt.xlabel(r'$m(e_{sig}^{+} e_{ROE}^{-})\;[GeV/c^{2}]$')
# plt.yscale("log")
# plt.xscale("log")
plt.ylim(0, 10)
plt.legend(loc='upper right')
plt.show()

In [None]:
Bins=50
Density = False
Stacked = False
Range = [0.0,1.0]
perBin = ((Range[1] - Range[0])/Bins)*1000
var = 'Ds_gammaveto_em_electronID'
# var = 'Ds_massDifference_0'
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

labels=[r'$D^{*+} \rightarrow D^{0} \; \pi^{+}$']
colors=["#4C6EB1"]

data = [DataFrames["All"][var],
       ]


plt.hist(data[::-1], color=colors[::-1], label=labels[::-1], alpha=1, range=Range, linewidth=1.5, stacked=Stacked, density=Density, bins=Bins, histtype='step')
plt.axvspan(Range[0],0.1,color='gray',alpha=0.2)
plt.axvline(0.1,ls='--',color='gray')

# Title
#---------
# Signal
# plt.title(r'$Generator-Level$', loc = "left")
# plt.title(r'$\bf Signal\;Events$', loc = "right")
# # Background
# plt.title(r'$\int\mathcal{L}dt\approx\;100$ fb$^{-1}$', loc = "left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc = "right")
# Label
#---------
plt.ylabel(r'$Entries$')
plt.xlabel(r'$electronID(e_{ROE}^{-})$')
# plt.yscale("log") 
# plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Configuration
Stacked = False
Density = False
Bins = 50
var = 'Ds_diff_D0pi'
Range = [0.16, 0.55]
BS = 0.531  # BDT Cut
Samples = "ccbar"

# Compute bin width in MeV
perBin = ((Range[1] - Range[0]) / Bins) * 1000
print(f"Width Per Bin: {perBin:.2f} MeV")

# Define Labels and Colors
labels = [
    r'$D^{*+} \rightarrow D^{0} \pi^{+}$',
    r'$D^{*0} \rightarrow D^{0} \gamma, D^{0} \pi^{0}$',
    r'$D^{0}$',
    r'$Other$'
]
colors = ['C2', 'C1', 'C4', 'C5']

# Define Scaling Factors for Each Background Component
background_factors = [1, 0.58, 1.3, 20.0]  # Adjust as needed

# Select Data for Background Categories
data = [
    DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstarplus']) == 1) &
                        (abs(DataFrames[Samples]['D0_isSignal']) == 1) &
                        (DataFrames[Samples]['Ds_extraInfo_BkgBDT'] >= BS)][var],
    DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_Dstar0']) == 1) &
                        (abs(DataFrames[Samples]['D0_isSignal']) == 1) &
                        (DataFrames[Samples]['Ds_extraInfo_BkgBDT'] >= BS)][var],
    DataFrames[Samples][(abs(DataFrames[Samples]['Ds_D0_NoDstarplusDstar0']) == 1) &
                        (abs(DataFrames[Samples]['D0_isSignal']) == 1) &
                        (DataFrames[Samples]['Ds_extraInfo_BkgBDT'] >= BS)][var],
    DataFrames[Samples][((abs(DataFrames[Samples]['Ds_D0_Other']) == 1) |
                         ((abs(DataFrames[Samples]['D0_mcPDG']) == 421) &
                          (abs(DataFrames[Samples]['D0_isSignal']) == 0))) &
                        (DataFrames[Samples]['Ds_extraInfo_BkgBDT'] >= BS)][var]
]

# Compute Weights for Each Background Component
background_weights = [np.full(len(d), factor) for d, factor in zip(data, background_factors)]

# Compute Weights for Signal
factor = 0.02  # Signal Scaling Factor
signal_data = DataFrames["Signal"][(DataFrames["Signal"]['Ds_extraInfo_BkgBDT'] >= BS)][var]
signal_weights = np.full(len(signal_data), factor)

# Plot Signal (Dashed Line)
plt.hist(signal_data, label="Signal", histtype='step', density=Density,
         bins=Bins, alpha=1, range=Range, weights=signal_weights, ls='--', linewidth=1.5)

# Plot Backgrounds with Individual Scaling Factors
plt.hist(data, color=colors, label=labels, density=Density, stacked=Stacked,
         bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range,
         weights=background_weights)

# Titles and Labels
plt.title(r'$BDT \geq 0.531$', loc="left")
plt.title(r'$\int\mathcal{L}dt\approx\;1443.999$ fb$^{-1}$', loc="right")
plt.ylabel(f'Entries/({perBin:.2f} MeV/$c^2$)')
plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})$ [GeV/$c^2$]')

# Auto-adjust y-axis limit
plt.ylim(0, max(np.histogram(np.concatenate(data), bins=Bins, range=Range)[0]) * 1.1)

# Log Scale (Optional)
# plt.yscale("log")
plt.ylim(0, 4000)
plt.legend()
plt.show()

In [None]:
Stacked = False
Density = False
Bins = 50
Range = [0.1, 0.55]
Op = -1
dM = -1
# i = 'e_cos_theta'
# i = 'Ds_vpho_CMS_daughterAngle'
i = 'Ds_diff_D0pi'
# i = 'Ds_chiProb_noIP'
# i = 'Ds_chiProb'
# i = 'Ds_extraInfo_FastBDT'
# i = 'D0_chiProb'
# i = 'Ds_Ds_starminusDs_M_Correction'
# i = "Ds_gammaveto_M_Correction"
# i = 'D0_chiProb'
# i = "Ds_L_diff"
# var = 'e_cos_theta'
# i = 'e_pt'
perBin = ((Range[1] - Range[0])/Bins)*1000
# perBin = ((Range[1] - Range[0])/Bins)
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

colors1 = ["#007C91", "#D1495B"]     # teal vs muted red
colors2 = ['C1', "#D1495B"]     # slate blue vs strong green
colors3 = ['C2', "#007C91"]     # indigo vs gold-brown

label3= r'$D^{0}$'
label4= r'$Other$'
labels1=[label3,label4]
data1=[
      DataFrames["All"][(abs(DataFrames["All"]['Ds_D0_NoDstarplusDstar0'])==1) & (DataFrames["All"]['Ds_gammaveto_M_Correction']>=dM) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=Op)][i],
      DataFrames["All"][(abs(DataFrames["All"]['Ds_D0_Other'])==1) & (DataFrames["All"]['Ds_gammaveto_M_Correction']>=dM) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=Op)][i],
      ]
labels2=[r'$D^{*0} \; (Comb.)$',r'$D^{*0} \; (Peak)$']
data2=[
      DataFrames["All"][(abs(DataFrames["All"]['Ds_mcPDG'])!=423) & (abs(DataFrames["All"]['Ds_D0_Dstar0'])==1) & (DataFrames["All"]['Ds_gammaveto_M_Correction']>=dM) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=Op)][i],
      DataFrames["All"][(abs(DataFrames["All"]['Ds_mcPDG'])==423) & (abs(DataFrames["All"]['Ds_D0_Dstar0'])==1) & (DataFrames["All"]['Ds_gammaveto_M_Correction']>=dM) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=Op)][i],
      ]
labels3=[r'$D^{*+} \; (Comb.)$',r'$D^{*+} \; (Peak)$']
data3=[
      DataFrames["All"][(abs(DataFrames["All"]['Ds_mcPDG'])!=413) & (abs(DataFrames["All"]['Ds_D0_Dstarplus'])==1) & (DataFrames["All"]['Ds_gammaveto_M_Correction']>=dM) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=Op)][i],
      DataFrames["All"][(abs(DataFrames["All"]['Ds_mcPDG'])==413) & (abs(DataFrames["All"]['Ds_D0_Dstarplus'])==1) & (DataFrames["All"]['Ds_gammaveto_M_Correction']>=dM) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=Op)][i],
      ]


# factor = 0.1
# plt.hist(DataFrames["Signal"][(DataFrames["Signal"]['Ds_gammaveto_M_Correction']>=dM) & (DataFrames["Signal"]["Ds_BS"]>=Op)][i], label="Signal", histtype='step', density=Density, bins=Bins, alpha=1, range=Range, weights=factor*np.ones_like(DataFrames["Signal"][(DataFrames["Signal"]['Ds_gammaveto_M_Correction']>=dM) & (DataFrames["Signal"]["Ds_BS"]>=Op)][i]), ls='--', linewidth=1.5)
# plt.hist(data1, color=colors1, label=labels1, density=Density, stacked=Stacked, bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range)
plt.hist(data2, color=colors2, label=labels2, density=Density, stacked=True, bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range)
plt.hist(data3, color=colors3, label=labels3, density=Density, stacked=True, bins=Bins, alpha=1, histtype='step', linewidth=1.5, range=Range)
# plt.axvspan(Range[0],0.15,color='gray',alpha=0.2)
# plt.axvline(0.58,ls='--',color='gray')

# Title
#--------
# plt.title(r'$BDT \; \geq 0.531$', loc = "left")
plt.title(r'$\int\mathcal{L}dt\approx\;1444$ fb$^{-1}$', loc = "right")
# Label
#-------
# plt.ylabel(r'$Entries/(\; {width:.2f}\;)$'.format(width = perBin))
# plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c)$'.format(width = perBin))
plt.ylabel(r'$Entries/(\; {width:.2f}\;MeV/c^2)$'.format(width = perBin))
# plt.xlabel(r'$p_{t} (e^{+}) [GeV/c]$')
# plt.xlabel(r'$\Delta \theta(D_s^{+} \; K^{+/-}/K_{s}^{0}) \; [rad]$')
# plt.xlabel(r'$cos\theta \; (e^{+})$')
# plt.xlabel(r'$p-value(D^{0})$')
# plt.xlabel(r'$p-value(D_{s}^{+})$')
# plt.xlabel(r'$p-value_{IP}(D_{s}^{+})$')
# plt.xlabel(r'$Fake D^{0} Suppression(D^{0})$')
# plt.xlabel(r'$m(e_{sig}^{+} e_{ROE}^{-})\;[GeV/c^{2}]$')
# plt.xlabel(r'$p_{t} \; (e^{+})\;[GeV/c]$')
plt.xlabel(r'$\Delta m(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
# plt.xlabel(r'$\Delta m(D_s^{*+} - D_{s}^{+})\;[GeV/c^{2}]$')
# plt.xlabel(r'$cos\theta \; (e^{+})$')
# plt.xlabel(r'$p-value(D^{0})$')
# plt.xlabel(r'$\mid \vec{x}_{D_{s}^{+}} - \vec{x}_{D^{0}} \mid \; [cm]$')
# plt.xlabel(r'$dz \; (e^{+}) \; [cm]$')
# plt.yscale("log")
# plt.xscale("log")
plt.ylim(0, 30000)
plt.legend()
plt.show()

# Combinatorial

In [None]:
Stacked = False
Density = False
Bins = 50
Range = [1.85, 2.15]
Op = 0.58
var = 'Ds_Dstar0Mode1_M'
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$D^{*0} \rightarrow D^{0} \gamma, D^{0} \pi^{0}$'

labels=[label1]
colors=['C1']

data = [
       DataFrames["All"][(abs(DataFrames["All"]['Ds_D0_Dstar0'])==1) & (abs(DataFrames["All"]['Ds_mcPDG'])!=423) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=Op)][var],
       ]


plt.hist(data[::-1], color=colors[::-1], label=labels[::-1], alpha=1, range=Range, stacked=Stacked, density=Density, bins=Bins, histtype='step')

# Title
#---------
plt.title(r'$D^{*0} \rightarrow D_{sig}^{0} \gamma_{ROE}$', loc = "left")
plt.title(r'$\bf Generic \; Events$', loc = "right")
# plt.title(r'$\int\mathcal{L}dt\approx\;100$ fb$^{-1}$', loc = "left")
# plt.title(r'$\bf Generic\;c\bar{c}\;Events$', loc = "right")
# Label
#---------
plt.ylabel(r'$Entries$')
plt.xlabel(r'$m(D^{*0}) \; [GeV/c^{2}]$')
# plt.yscale("log") 
plt.legend()
plt.show()

In [None]:
Stacked = False
Density = False
Bins = 50
Range = [1.95, 2.125]
Op = 0.58
var = 'Ds_Dstar0Mode2_M'
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$D^{*0} \rightarrow D^{0} \gamma, D^{0} \pi^{0}$'

labels=[label1]
colors=['C1']

data = [
       DataFrames["All"][(abs(DataFrames["All"]['Ds_D0_Dstar0'])==1) & (abs(DataFrames["All"]['Ds_mcPDG'])!=423) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=Op)][var],
       ]


plt.hist(data[::-1], color=colors[::-1], label=labels[::-1], alpha=1, range=Range, stacked=Stacked, density=Density, bins=Bins, histtype='step')

# Title
#---------
plt.title(r'$D^{*0} \rightarrow D_{sig}^{0} \pi_{ROE}^{0}$', loc = "left")
plt.title(r'$\bf Generic \; Events$', loc = "right")
# plt.title(r'$\int\mathcal{L}dt\approx\;100$ fb$^{-1}$', loc = "left")
# plt.title(r'$\bf Generic\;c\bar{c}\;Events$', loc = "right")
# Label
#---------
plt.ylabel(r'$Entries$')
plt.xlabel(r'$m(D^{*0}) \; [GeV/c^{2}]$')
# plt.yscale("log") 
plt.legend()
plt.show()

In [None]:
Stacked = False
Density = False
Bins = 50
Range = [1.95, 2.1]
Op = 0.58
var = 'Ds_Dstarplus_M'
perBin = ((Range[1] - Range[0])/Bins)*1000
print("Width Per Bin: {width:.2f} MeV".format(width = perBin))

label1= r'$D^{*+} \rightarrow D^{0} \pi^{+}$'

labels=[label1]
colors=['C2']

data = [
       DataFrames["All"][(abs(DataFrames["All"]['Ds_D0_Dstarplus'])==1) & (abs(DataFrames["All"]['Ds_mcPDG'])!=413) & (DataFrames["All"]['Ds_extraInfo_BkgBDT']>=Op)][var],
       ]


plt.hist(data[::-1], color=colors[::-1], label=labels[::-1], alpha=1, range=Range, stacked=Stacked, density=Density, bins=Bins, histtype='step')

# Title
#---------
plt.title(r'$D^{*+} \rightarrow D_{sig}^{0} \pi_{ROE}^{+}$', loc = "left")
plt.title(r'$\bf Generic \; Events$', loc = "right")
# plt.title(r'$\int\mathcal{L}dt\approx\;100$ fb$^{-1}$', loc = "left")
# plt.title(r'$\bf Generic\;c\bar{c}\;Events$', loc = "right")
# Label
#---------
plt.ylabel(r'$Entries$')
plt.xlabel(r'$m(D^{*+}) \; [GeV/c^{2}]$')
# plt.yscale("log") 
plt.legend()
plt.show()

# 2-D Histogram

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib.colors as colors

def plot_2d_with_marginals(df, xvar, yvar, xrange=None, yrange=None, bins=50, title=None):
    # Extract variables
    x = df[xvar]
    y = df[yvar]

    # Apply range cuts if specified
    if xrange is not None:
        xmask = (x >= xrange[0]) & (x <= xrange[1])
    else:
        xmask = pd.Series([True] * len(x))

    if yrange is not None:
        ymask = (y >= yrange[0]) & (y <= yrange[1])
    else:
        ymask = pd.Series([True] * len(y))

    # Combined mask
    mask = xmask & ymask
    x = x[mask]
    y = y[mask]

    # Set up figure layout
    fig = plt.figure(figsize=(6, 6))
    gs = gridspec.GridSpec(4, 4, hspace=0.05, wspace=0.05)

    ax_main = fig.add_subplot(gs[1:4, 0:3])
    ax_xhist = fig.add_subplot(gs[0, 0:3], sharex=ax_main)
    ax_yhist = fig.add_subplot(gs[1:4, 3], sharey=ax_main)
    ax_cbar = fig.add_subplot(gs[0, 3])

    # Define histogram range
    hist_range = None
    if xrange is not None and yrange is not None:
        hist_range = [xrange, yrange]

    # 2D histogram
    # counts, xedges, yedges, im = ax_main.hist2d(x, y, bins=bins, range=hist_range, cmap="viridis", norm=colors.LogNorm())  # <-- This is the key)
    counts, xedges, yedges, im = ax_main.hist2d(x, y, bins=bins, range=hist_range, cmap="viridis")
    cbar = fig.colorbar(im, cax=ax_cbar)
    cbar.set_label("Entries")

    # Marginal histograms
    ax_xhist.hist(x, bins=bins, range=xrange, color="steelblue")
    ax_yhist.hist(y, bins=bins, range=yrange, orientation="horizontal", color="steelblue")

    # Axis labels
    ax_main.set_xlabel(r'$\Delta m_{e}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
    ax_main.set_ylabel(r'$\Delta m_{\pi}(D_s^{+} - D^{0})\;[GeV/c^{2}]$')
    ax_xhist.set_ylabel("Entries")
    ax_yhist.set_xlabel("Entries")

    # Clean ticks
    plt.setp(ax_xhist.get_xticklabels(), visible=False)
    plt.setp(ax_yhist.get_yticklabels(), visible=False)

    # Optional title
    if title:
        plt.suptitle(title, fontsize=14)

    plt.tight_layout(rect=[0, 0, 1, 0.96])  # Leave space for title
    plt.show()


In [None]:
plot_2d_with_marginals(DataFrames["All"],
    xvar="Ds_massDifference_0",
    yvar="Ds_diff_D0pi",
    xrange=(0.0, 0.25),
    yrange=(0.1, 0.55),
    bins=60,
    title="Background"
)

In [None]:
plot_2d_with_marginals(DataFrames["Signal"],
    xvar="Ds_massDifference_0",
    yvar="Ds_diff_D0pi",
    xrange=(0.0, 0.25),
    yrange=(0.1, 0.60),
    bins=60,
    title="Signal"
)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Compute the correlation matrix
corr = DataFrames["Signal"][["Ds_massDifference_0", "Ds_diff_D0pi"]].corr()

# Create the plot
plt.figure(figsize=(8, 6))
heatmap = sns.heatmap(corr, annot=True, cmap="coolwarm", vmin=-1, vmax=1)

# Set custom labels
new_labels = [
    r'$\Delta m_{e}(D_s^{+} - D^{0})\;[\mathrm{GeV}/c^{2}]$',
    r'$\Delta m_{\pi}(D_s^{+} - D^{0})\;[\mathrm{GeV}/c^{2}]$'
]

# Apply new tick labels
heatmap.set_xticklabels(new_labels, rotation=45, ha='right')
heatmap.set_yticklabels(new_labels, rotation=0)

# Set title
heatmap.set_title('Signal Correlation Heatmap', fontdict={'fontsize': 18}, pad=16)

plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(8, 6))

heatmap = sns.heatmap(DataFrames["Signal"][["Ds_massDifference_0","Ds_diff_D0pi"]].corr(), annot=True, cmap="coolwarm",vmin=-1, vmax=1)

heatmap.set_title('Signal Correlation Heatmap', fontdict={'fontsize':18}, pad=16)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Compute the correlation matrix
corr = DataFrames["All"][["Ds_massDifference_0", "Ds_diff_D0pi"]].corr()

# Create the plot
plt.figure(figsize=(8, 6))
heatmap = sns.heatmap(corr, annot=True, cmap="coolwarm", vmin=-1, vmax=1)

# Set custom axis labels
new_labels = [
    r'$\Delta m_{e}(D_s^{+} - D^{0})\;[\mathrm{GeV}/c^{2}]$',
    r'$\Delta m_{\pi}(D_s^{+} - D^{0})\;[\mathrm{GeV}/c^{2}]$'
]

# Apply new tick labels
heatmap.set_xticklabels(new_labels, rotation=45, ha='right')
heatmap.set_yticklabels(new_labels, rotation=0)

# Set the title
heatmap.set_title('Background Correlation Heatmap', fontdict={'fontsize': 18}, pad=16)

plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(8, 6))

heatmap = sns.heatmap(DataFrames["All"][["Ds_massDifference_0","Ds_diff_D0pi"]].corr(), annot=True, cmap="coolwarm",vmin=-1, vmax=1)

heatmap.set_title('Background Correlation Heatmap', fontdict={'fontsize':18}, pad=16)

In [None]:
from sklearn.feature_selection import mutual_info_regression

X = DataFrames["All"]['Ds_massDifference_0'].values.reshape(-1, 1)
y = DataFrames["All"]['Ds_diff_D0pi'].values
mi = mutual_info_regression(X, y, discrete_features=False)
print(f"Mutual Information: {mi[0]:.3f}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import spearmanr

# Extract relevant variables from your DataFrame
df = DataFrames["All"]
x = df["Ds_massDifference_0"]
y = df["Ds_diff_D0pi"]
e_energy = df["e_p"]

# Define energy bins
bins = np.linspace(e_energy.min(), e_energy.max(), 25)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
spearman_corr = []

# Compute Spearman correlation in each bin
for i in range(len(bins) - 1):
    mask = (e_energy >= bins[i]) & (e_energy < bins[i+1])
    if mask.sum() > 100:  # Require enough entries for stable correlation
        rho, _ = spearmanr(x[mask], y[mask])
        spearman_corr.append(rho)
    else:
        spearman_corr.append(np.nan)  # Skip underpopulated bins

# Plotting
plt.figure(figsize=(8, 5))
plt.plot(bin_centers, spearman_corr, marker='o', linestyle='-', label="Spearman ρ")
plt.xlabel("Electron energy (GeV)", fontsize=12)
plt.ylabel("Spearman correlation (ρ)", fontsize=12)
plt.title("Spearman correlation: Δm (e vs π hypothesis) vs Electron energy", fontsize=14)
plt.grid(True)
plt.ylim(0, 1.05)
plt.tight_layout()
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import normalized_mutual_info_score
import numpy as np

# Discretize data into bins (needed for mutual_info_score)
x = DataFrames["All"]["Ds_massDifference_0"]
y = DataFrames["All"]["Ds_diff_D0pi"]

# Convert to binned categorical variables
bins = 200  # adjust as needed
x_binned = np.digitize(x, np.histogram_bin_edges(x, bins=bins))
y_binned = np.digitize(y, np.histogram_bin_edges(y, bins=bins))

nmi = normalized_mutual_info_score(x_binned, y_binned)
print(f"Normalized Mutual Information = {nmi:.3f}")

