In [None]:
import sys

import seaborn as sns

import numpy as np
import pandas as pd
import uproot
import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib.colors as colors

In [None]:
plt.rcParams.update({
    "axes.labelsize": 14,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 12,
    "figure.titlesize": 18
})

In [None]:
sys.path.append("/home/belle2/amubarak/Ds2D0enue_Analysis/07-Python_Functions/")

# Prep-Work

### Import Data

In [None]:
# === Load only selected branches ===
with open("/home/belle2/amubarak/Ds2D0enue_Analysis/03-Grid/Save_var.txt") as f:
    variables_to_load = [
        line.strip().strip(",").strip('"').strip("'")
        for line in f
        if line.strip() and not line.strip().startswith("#")
    ]

# In this notebook we only process the main signal and the generic events,
# for illustration purposes.
# You can add other backgrounds after if you wish.
samples = ["Signal","BB","ccbar","ddbar","ssbar","taupair","uubar"]
GenEvents = ["Signal","BB","ccbar","ddbar","ssbar","taupair","uubar"]

DataFrames = {}  # define empty dictionary to hold dataframes
Date = "0526"
Attempt = "0"

# Signal:
DataFrames[samples[0]] =  uproot.concatenate("/home/belle2/amubarak/C01-Simulated_Events/Ds2D0enu-Signal.root:Dstree",
                                             filter_name=variables_to_load,
                                             library='pd')
# Background
for s in samples[1:]: # loop over samples
    DataFrames[s] =  uproot.concatenate("/group/belle2/users2022/amubarak/02-Grid/Sample_Grid/Ds2D0e-Generic_Ds_" + Date +"25_"+ Attempt +"_"+ s +".root:Dstree",
                                        filter_name=variables_to_load,
                                        library='pd')

# Define background samples
background_samples = ["BB","ccbar","ddbar","ssbar","taupair","uubar"]

# Concatenate all background DataFrames into one called "All"
DataFrames["All"] = pd.concat([DataFrames[s] for s in background_samples], ignore_index=True)

# Combine light-quark backgrounds into a single 'uds' category
DataFrames["uds"] = pd.concat(
    [DataFrames["uubar"], DataFrames["ddbar"], DataFrames["ssbar"]],
    ignore_index=True
)

In [None]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

The line below is to look at the available variables.

In [None]:
DataFrames["Signal"].columns.tolist()

### Setup
The code below will be used to apply cuts to the data.

In [None]:
# Electron ID
#-------------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['e_electronID']>=0.95]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['e_electronID']>=0.95]
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_gammaveto_em_electronID']>=0.95]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_gammaveto_em_electronID']>=0.95]

# Photon Conversion
#-------------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_gammaveto_M_Correction']>=0.1]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_gammaveto_M_Correction']>=0.1]

# Peaking Background Removal
#----------------------------
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_diff_D0pi']>=0.15)]
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['Ds_diff_D0pi']>=0.15)]

# # Vertex Fitting
# #----------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames["Signal"]['Ds_chiProb']>=0.01]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames["ccbar"]['Ds_chiProb']>=0.01]

# Dalitz Removal
#----------------------------
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['Ds_pi0veto_M_Correction']<=0.08) | (DataFrames["ccbar"]['Ds_pi0veto_M_Correction']>=0.16)]
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['Ds_pi0veto_M_Correction']<=0.08) | (DataFrames["Signal"]['Ds_pi0veto_M_Correction']>=0.16)]

# Vertex Fit
#----------------
# DataFrames["Signal"] = DataFrames["Signal"][DataFrames[samples[0]]['Ds_chiProb_Ds_rank']==1]
# DataFrames["ccbar"] = DataFrames["ccbar"][DataFrames[samples[1]]['Ds_chiProb_Ds_rank']==1]

# # D0 Invariant Mass
# #-----------------------
# DataFrames["Signal"] = DataFrames["Signal"][(DataFrames["Signal"]['D0_dM']>=-0.02) & (DataFrames["ccbar"]['D0_dM']<=0.02)]
# DataFrames["ccbar"] = DataFrames["ccbar"][(DataFrames["ccbar"]['D0_dM']>=-0.02) & (DataFrames["ccbar"]['D0_dM']<=0.02)]

# Comparison

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# === Updated Variables ===
Variables = [
    'pi_dr',
    'D0_dM',
    'D0_chiProb',
    'D0_flightDistance',
    'D0_useCMSFrame_p',
    'D0_cos_decayAngle_1',
]

features = [
    r'$dr(\pi^{+})\;[\mathrm{cm}]$',
    r'$m(D^{0}) - m_{\mathrm{PDG}}(D^{0})\;[\mathrm{GeV}/c^{2}]$',
    r'p-value of $D^{0}$',
    r'$Flight\;Distance(D^{0})\;[\mathrm{cm}]$',
    r'$p^{*}(D^{0})\;[\mathrm{GeV}/c]$',
    r'$\cos\theta^*_{\mathrm{daughter}_1}$',
]

ranges = {
    'pi_dr': [0, 0.1],
    'D0_dM': [-0.02, 0.02],
    'D0_chiProb': [0, 1],
    'D0_flightDistance': [-0.4, 0.4],
    'D0_useCMSFrame_p': [2.5, 5.0],
    'D0_cos_decayAngle_1': [-1, 1],
}

bins = 50
density = True
samples = "All"

# === Fixed Color Scheme ===
colors = {
    'signal': '#007C91',   # Real signal
    'other': '#2E2E2E',    # Everything else
}

bg_labels = [
    r'$Other$',
    r'$D^{0}$',
    r'$D^{*0} \rightarrow D^{0} \; \pi^{0} / \gamma$',
    r'$D^{*+} \rightarrow D^{0} \; \pi^{+}$'
]

bg_masks = [
    lambda df: df['D0_isSignal'].isna() | (df['D0_isSignal'] == 0),
    lambda df: (df['Ds_D0_NoDstarplusDstar0'] == 1) & (df['D0_isSignal'] == 1),
    lambda df: (df['Ds_D0_Dstar0'] == 1) & (df['D0_isSignal'] == 1),
    lambda df: (df['Ds_D0_Dstarplus'] == 1) & (df['D0_isSignal'] == 1),
]

# === Plotting ===
if "All" in DataFrames and "Signal" in DataFrames:
    for idx, (var, label) in enumerate(zip(Variables, features)):
        var_range = ranges[var]
        bin_width = (var_range[1] - var_range[0]) / bins

        real_signal_data = DataFrames["Signal"][DataFrames["Signal"]['Ds_isSignal'] == 1][var]

        for jdx, (mask, bg_label) in enumerate(zip(bg_masks, bg_labels)):
            bg_data = DataFrames[samples][mask(DataFrames[samples])][var]

            plt.hist(real_signal_data, label="Real Signal", histtype='step', density=density,
                     bins=bins, alpha=1, range=var_range, linewidth=2, color=colors['signal'])

            plt.hist(bg_data, label=bg_label, histtype='step', density=density,
                     bins=bins, alpha=1, range=var_range, linewidth=2, color=colors['other'])

            plt.xlabel(label)
            plt.ylabel(r'$Normalized\;Entries/({:.2f})$'.format(bin_width))
            plt.legend(loc='upper right')
#             plt.title(f"Real Signal vs {bg_label}", fontsize=15)
            plt.show()

        # Real vs Fake Signal
        fake_signal = DataFrames["Signal"][DataFrames["Signal"]['Ds_isSignal'] == 0][var]

        plt.hist(real_signal_data, label="Real Signal", histtype='step', density=density,
                 bins=bins, alpha=1, range=var_range, linewidth=2, color=colors['signal'])

        plt.hist(fake_signal, label="Fake Signal", histtype='step', density=density,
                 bins=bins, alpha=1, range=var_range, linewidth=2, color=colors['other'])

        plt.xlabel(label)
        plt.ylabel(r'$Normalized\;Entries/({:.2f})$'.format(bin_width))
        plt.legend(loc='upper right')
        plt.title("Real vs Fake Signal", fontsize=15)
        plt.show()
else:
    print("DataFrames['All'] and DataFrames['Signal'] must be defined.")