In [1]:
import numpy as np
import mplhep as hep
import matplotlib.pyplot as plt
import uproot, os, sys
import awkward as ak
# Get the notebook directory
notebook_dir = os.path.dirname(os.path.abspath("__file__"))
# Add the project root to sys.path
sys.path.append(os.path.join(notebook_dir, ".."))
from utils.branches import get_branches
from utils.plot import plot_data
from utils.constants import trigcut, truthpkk
from utils.data_loader import load_data
from matplotlib import rcParams
import matplotlib as mpl
plt.style.use(hep.style.LHCb1)
config = {"mathtext.fontset":'stix'}
rcParams.update(config)

In [2]:
plt.rcParams.update({
    # Keep the font family settings for LHCb style
    "font.family": "serif",
    "font.serif": ["Times", "Computer Modern Roman", "DejaVu Serif"],
    
    # # Increase only the size-related parameters
    # "figure.figsize": (15, 10),  # Larger figure
    # "figure.dpi": 100,          # Screen display
    # "savefig.dpi": 300,         # Saved figure resolution
    
    # # # Increase font sizes while keeping LHCb style
    "font.size": 12,            # Base font size (increase from default)
    "axes.titlesize": 12,       # Title size
    "axes.labelsize": 10,       # Axis label size
    "xtick.labelsize": 12,      # X tick label size
    "ytick.labelsize": 12,      # Y tick label size
    "legend.fontsize": 12       # Legend font size
})


In [3]:
# data_path = "/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced"
# decay_modes = ["L0barPKpKm"]
# particles = ["h1", "h2", "p"]

# data_ll = load_data(
#     data_path=data_path,
#     decay_modes=decay_modes,
#     tracks=["LL"],
#     particles=particles
# )

# data_dd = load_data(
#     data_path=data_path,
#     decay_modes=decay_modes,
#     tracks=["DD"],
#     particles=particles
# )

data_path = "/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced"
particles = ["h1", "h2", "p"]
data_ll = load_data(
    data_path=data_path,
    decay_mode="L0barPKpKm",  # or "L0PbarKpKp"
    tracks=["LL"],
    particles=particles
)


Real Data Files being processed for decay mode L0barPKpKm with tracks ['LL']: ['/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_16MD_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_16MU_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_17MD_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_17MU_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_18MD_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_18MU_reduced.root:B2L0barPKpKm_LL/DecayTree']
Branches being read: ['h1_P', 'h1_PT', 'h1_PE', 'h1_PX', 'h1_PY', 'h1_PZ', 'h1_ID', 'h1_TRACK_Type', 'h1_IPCHI2_OWNPV', 'h2_P', 'h2_PT', 'h2_PE', 'h2_PX', 'h2_PY', 'h2_PZ', 'h2_ID', '

In [4]:
data_path = "/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced"
decay_modes = "L0barPKpKm"
particles = ["h1", "h2", "p"]

data_ll = load_data(
    data_path=data_path,
    decay_mode=decay_modes,
    tracks=["LL"],
    particles=particles
)

data_dd = load_data(
    data_path=data_path,
    decay_mode=decay_modes,
    tracks=["DD"],
    particles=particles
)


Real Data Files being processed for decay mode L0barPKpKm with tracks ['LL']: ['/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_16MD_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_16MU_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_17MD_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_17MU_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_18MD_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_18MU_reduced.root:B2L0barPKpKm_LL/DecayTree']
Branches being read: ['h1_P', 'h1_PT', 'h1_PE', 'h1_PX', 'h1_PY', 'h1_PZ', 'h1_ID', 'h1_TRACK_Type', 'h1_IPCHI2_OWNPV', 'h2_P', 'h2_PT', 'h2_PE', 'h2_PX', 'h2_PY', 'h2_PZ', 'h2_ID', '

In [5]:
import numpy as np
import awkward as ak
from collections import OrderedDict

def apply_selection_cuts(events, track_type='LL'):
    """
    Apply selection cuts to B+ → Λ0 h1 h2 samples based on specific criteria
    
    Parameters:
    -----------
    events : awkward.Array or dict-like object
        Events from uproot containing the MC sample
    track_type : str
        Track type, either 'LL' (Long-Long) or 'DD' (Downstream-Downstream)
    
    Returns:
    --------
    numpy.ndarray
        Boolean mask of selected events
    dict
        Summary of the cuts applied
    """
    # Initialize mask with all True
    mask = np.ones(len(events), dtype=bool)
    
    # Track the cuts for debugging and reporting
    cuts_summary = OrderedDict()
    initial_events = len(events)
    
    # ===== p (Proton) Cuts =====
    # MC15TuneV1_ProbNNp > 0.05
    p_prob_cut = events['p_MC15TuneV1_ProbNNp'] > 0.05
    mask = mask & p_prob_cut
    cuts_summary['proton_prob_cut'] = np.sum(p_prob_cut)
    
    # ===== Λ0 Cuts =====
    
    # ΔZ > 20 mm (difference between Lambda decay vertex and primary vertex)
    delta_z = events['L0_ENDVERTEX_Z'] - events['L0_OWNPV_Z']
    delta_z_cut = delta_z > 20
    mask = mask & delta_z_cut
    cuts_summary['delta_z_cut'] = np.sum(delta_z_cut)
    
    # χ²FD > 45 (Lambda flight distance chi2)
    fd_chi2_cut = events['L0_FDCHI2_OWNPV'] > 45
    mask = mask & fd_chi2_cut
    cuts_summary['fd_chi2_cut'] = np.sum(fd_chi2_cut)
    
    # |m(pπ⁻) - 1115.6| < 6 MeV/c²
    lambda_mass_diff = np.abs(events['L0_M'] - 1115.6)
    lambda_mass_cut = lambda_mass_diff < 6
    mask = mask & lambda_mass_cut
    cuts_summary['lambda_mass_cut'] = np.sum(lambda_mass_cut)
    
    # Lp_MC15TuneV1_ProbNNp > 0.2
    lp_prob_cut = events['Lp_MC15TuneV1_ProbNNp'] > 0.2
    mask = mask & lp_prob_cut
    cuts_summary['lp_prob_cut'] = np.sum(lp_prob_cut)
    
    # ===== h1 and h2 (Kaon) Cuts =====
    
    # Calculate KK product = h1_ProbNNk × h2_ProbNNk
    kk_product = events['h1_ProbNNk'] * events['h2_ProbNNk']
    
    # Apply a threshold cut on the KK product if needed
    # Threshold value can be adjusted later as required
    kk_product_threshold = 0.04  # Example threshold (0.2 * 0.2)
    kk_product_cut = kk_product > kk_product_threshold
    mask = mask & kk_product_cut
    cuts_summary['kk_product_cut'] = np.sum(kk_product_cut)
    
    # Add individual kaon cuts for reference
    h1_kaon_cut = events['h1_ProbNNk'] > 0.2
    cuts_summary['h1_kaon_cut'] = np.sum(h1_kaon_cut)
    
    h2_kaon_cut = events['h2_ProbNNk'] > 0.2
    cuts_summary['h2_kaon_cut'] = np.sum(h2_kaon_cut)
    
    # ===== B⁺ Cuts =====
    
    # pT > 3000 MeV/c
    b_pt_cut = events['Bu_PT'] > 3000
    mask = mask & b_pt_cut
    cuts_summary['b_pt_cut'] = np.sum(b_pt_cut)
    
    # χ²DTF < 30 & Converged (DTF = Decay Tree Fitter)
    dtf_chi2 = events['Bu_DTF_chi2']
    dtf_chi2_cut = (dtf_chi2 < 30) 
    mask = mask & dtf_chi2_cut
    cuts_summary['dtf_chi2_cut'] = np.sum(dtf_chi2_cut)
    
    # χ²IP < 10 (Impact Parameter Chi2)
    ip_chi2_cut = events['Bu_IPCHI2_OWNPV'] < 10
    mask = mask & ip_chi2_cut
    cuts_summary['ip_chi2_cut'] = np.sum(ip_chi2_cut)
    
    # χ²FD > 175 (Flight Distance Chi2)
    b_fd_chi2_cut = events['Bu_FDCHI2_OWNPV'] > 175
    mask = mask & b_fd_chi2_cut
    cuts_summary['b_fd_chi2_cut'] = np.sum(b_fd_chi2_cut)
    
    # Print selection summary
    selected_events = np.sum(mask)
    print(f"Selection summary for {track_type} sample:")
    print(f"Initial events: {initial_events}")
    for cut_name, cut_count in cuts_summary.items():
        print(f"  {cut_name}: {cut_count} / {initial_events} ({cut_count/initial_events:.2%})")
    print(f"Final selected events: {selected_events} / {initial_events} ({selected_events/initial_events:.2%})")
    
    return mask, cuts_summary, kk_product  # Return kk_product for further analysis if needed

def apply_cuts_to_samples(mc_ll, mc_dd):
    """
    Apply selection cuts to both LL and DD samples
    
    Parameters:
    -----------
    mc_ll : awkward.Array or dict-like object
        Long-Long track type MC sample
    mc_dd : awkward.Array or dict-like object
        Downstream-Downstream track type MC sample
    
    Returns:
    --------
    tuple
        (mc_ll_mask, mc_dd_mask, ll_cuts_summary, dd_cuts_summary, ll_kk_product, dd_kk_product)
    """
    # Apply cuts to LL sample
    ll_mask, ll_cuts_summary, ll_kk_product = apply_selection_cuts(mc_ll, track_type='LL')
    
    # Apply cuts to DD sample
    dd_mask, dd_cuts_summary, dd_kk_product = apply_selection_cuts(mc_dd, track_type='DD')
    
    # Print comparison between LL and DD
    ll_total = len(mc_ll)
    dd_total = len(mc_dd)
    ll_selected = np.sum(ll_mask)
    dd_selected = np.sum(dd_mask)
    
    print("\nComparison between LL and DD selection efficiency:")
    print(f"LL: {ll_selected}/{ll_total} ({ll_selected/ll_total:.2%})")
    print(f"DD: {dd_selected}/{dd_total} ({dd_selected/dd_total:.2%})")
    
    # Print KK product statistics
    print("\nKK Product Statistics:")
    print(f"LL - Mean: {np.mean(ll_kk_product):.4f}, Median: {np.median(ll_kk_product):.4f}")
    print(f"DD - Mean: {np.mean(dd_kk_product):.4f}, Median: {np.median(dd_kk_product):.4f}")
    
    return ll_mask, dd_mask, ll_cuts_summary, dd_cuts_summary, ll_kk_product, dd_kk_product

def apply_mask_to_data(events, mask):
    """
    Apply a boolean mask to event data
    
    Parameters:
    -----------
    events : awkward.Array or dict-like object
        Event data
    mask : numpy.ndarray
        Boolean mask to apply
    
    Returns:
    --------
    awkward.Array or dict-like object
        Selected events
    """
    if hasattr(events, 'mask'):
        # For awkward arrays
        return events[mask]
    else:
        # For dictionary-like objects
        selected = {}
        for key, array in events.items():
            selected[key] = array[mask]
        return selected

# Usage example:
"""
# Apply the cuts to get the selection masks
ll_mask, dd_mask, ll_summary, dd_summary, ll_kk_product, dd_kk_product = apply_cuts_to_samples(mc_ll, mc_dd)

# Apply the masks to get the selected events
mc_ll_selected = apply_mask_to_data(mc_ll, ll_mask)
mc_dd_selected = apply_mask_to_data(mc_dd, dd_mask)

# Now you can proceed with analysis using the selected samples and KK product information
# Further operations with mc_ll_selected, mc_dd_selected, ll_kk_product, dd_kk_product...
"""

'\n# Apply the cuts to get the selection masks\nll_mask, dd_mask, ll_summary, dd_summary, ll_kk_product, dd_kk_product = apply_cuts_to_samples(mc_ll, mc_dd)\n\n# Apply the masks to get the selected events\nmc_ll_selected = apply_mask_to_data(mc_ll, ll_mask)\nmc_dd_selected = apply_mask_to_data(mc_dd, dd_mask)\n\n# Now you can proceed with analysis using the selected samples and KK product information\n# Further operations with mc_ll_selected, mc_dd_selected, ll_kk_product, dd_kk_product...\n'

In [6]:
selected_ll, selected_dd, ll_summary, dd_summary, ll_kk_product, dd_kk_product = apply_cuts_to_samples(data_ll, data_dd)

Selection summary for LL sample:
Initial events: 713632
  proton_prob_cut: 713632 / 713632 (100.00%)
  delta_z_cut: 629470 / 713632 (88.21%)
  fd_chi2_cut: 643705 / 713632 (90.20%)
  lambda_mass_cut: 378105 / 713632 (52.98%)
  lp_prob_cut: 491539 / 713632 (68.88%)
  kk_product_cut: 713632 / 713632 (100.00%)
  h1_kaon_cut: 596584 / 713632 (83.60%)
  h2_kaon_cut: 596142 / 713632 (83.54%)
  b_pt_cut: 713632 / 713632 (100.00%)
  dtf_chi2_cut: 713632 / 713632 (100.00%)
  ip_chi2_cut: 713632 / 713632 (100.00%)
  b_fd_chi2_cut: 713632 / 713632 (100.00%)
Final selected events: 262305 / 713632 (36.76%)
Selection summary for DD sample:
Initial events: 561928
  proton_prob_cut: 561928 / 561928 (100.00%)
  delta_z_cut: 561873 / 561928 (99.99%)
  fd_chi2_cut: 529917 / 561928 (94.30%)
  lambda_mass_cut: 488844 / 561928 (86.99%)
  lp_prob_cut: 517923 / 561928 (92.17%)
  kk_product_cut: 561928 / 561928 (100.00%)
  h1_kaon_cut: 462603 / 561928 (82.32%)
  h2_kaon_cut: 475274 / 561928 (84.58%)
  b_pt_cut

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak

def plot_selection_variables_separate(mc_ll, mc_dd, output_prefix="selection_variables"):
    """
    Plot selection variables for LL and DD samples in separate files with 6 subplots per page
    
    Parameters:
    -----------
    mc_ll : awkward.Array
        Long-Long track type MC sample
    mc_dd : awkward.Array
        Downstream-Downstream track type MC sample
    output_prefix : str
        Prefix for output files
    """
    # Define variables to plot and their properties
    variables = [
        {
            'name': 'p_MC15TuneV1_ProbNNp',
            'label': 'p_MC15TuneV1_ProbNNp',
            'cut_value': 0.05,
            'cut_type': '>'
        },
        {
            'name': 'L0_FDCHI2_OWNPV',
            'label': 'Lambda χ²FD',
            'cut_value': 45,
            'cut_type': '>'
        },
        {
            'name': 'Lp_MC15TuneV1_ProbNNp',
            'label': 'Lp_MC15TuneV1_ProbNNp',
            'cut_value': 0.2,
            'cut_type': '>'
        },
        {
            'name': 'Bu_PT',
            'label': 'B+ pT [MeV/c]',
            'cut_value': 3000,
            'cut_type': '>'
        },
        {
            'name': 'Bu_IPCHI2_OWNPV',
            'label': 'B+ χ²IP',
            'cut_value': 10,
            'cut_type': '<'
        },
        {
            'name': 'Bu_FDCHI2_OWNPV',
            'label': 'B+ χ²FD',
            'cut_value': 175,
            'cut_type': '>'
        },
        {
            'name': 'h1_ProbNNk',
            'label': 'h1_ProbNNk',
            'cut_value': 0.2,
            'cut_type': '>'
        },
        {
            'name': 'h2_ProbNNk',
            'label': 'h2_ProbNNk',
            'cut_value': 0.2,
            'cut_type': '>'
        },
        # We'll handle Bu_DTF_chi2 separately
    ]
    
    # Add delta_z calculation separately
    delta_z = {
        'name': 'delta_z',
        'label': 'ΔZ [mm]',
        'cut_value': 20,
        'cut_type': '>'
    }
    
    # Add Bu_DTF_chi2 separately with special handling
    bu_dtf_chi2 = {
        'name': 'Bu_DTF_chi2',
        'label': 'B+ χ²DTF',
        'cut_value': 30,
        'cut_type': '<'
    }
    
    # Add KK product separately
    kk_product = {
        'name': 'kk_product',
        'label': 'h1_ProbNNk × h2_ProbNNk',
        'cut_value': 0.2,
        'cut_type': '>'
    }
    
    # Total number of variables
    all_vars = [delta_z] + variables + [bu_dtf_chi2, kk_product]
    n_vars = len(all_vars)
    
    # Calculate how many plots we need with 6 subplots per page
    vars_per_page = 6
    n_pages = (n_vars + vars_per_page - 1) // vars_per_page  # Ceiling division
    
    # Define grid layout for each page (3x2 grid)
    n_rows, n_cols = 2, 3
    
    # Create separate figures for each page of LL and DD
    ll_figs = []
    ll_axes = []
    dd_figs = []
    dd_axes = []
    
    for page in range(n_pages):
        # Create figures for this page
        fig_ll, axes_ll = plt.subplots(n_rows, n_cols, figsize=(15, 10))
        fig_dd, axes_dd = plt.subplots(n_rows, n_cols, figsize=(15, 10))
        
        # Flatten axes for easier indexing
        axes_ll = axes_ll.flatten()
        axes_dd = axes_dd.flatten()
        
        ll_figs.append(fig_ll)
        ll_axes.append(axes_ll)
        dd_figs.append(fig_dd)
        dd_axes.append(axes_dd)
    
    # Process each variable
    for i, var_info in enumerate(all_vars):
        # Determine which page and which subplot position
        page_idx = i // vars_per_page
        subplot_idx = i % vars_per_page
        
        var_name = var_info['name']
        
        # Special handling for delta_z
        if var_name == 'delta_z':
            try:
                ll_delta_z = np.array(ak.to_numpy(mc_ll['L0_ENDVERTEX_Z']) - ak.to_numpy(mc_ll['L0_OWNPV_Z']))
                dd_delta_z = np.array(ak.to_numpy(mc_dd['L0_ENDVERTEX_Z']) - ak.to_numpy(mc_dd['L0_OWNPV_Z']))
                
                # Process delta_z 
                process_variable_single(ll_axes[page_idx][subplot_idx], ll_delta_z, var_info, "LL")
                process_variable_single(dd_axes[page_idx][subplot_idx], dd_delta_z, var_info, "DD")
                
            except Exception as e:
                print(f"Error processing delta_z: {e}")
                ll_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error processing delta_z", 
                               ha='center', va='center', transform=ll_axes[page_idx][subplot_idx].transAxes)
                dd_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error processing delta_z", 
                               ha='center', va='center', transform=dd_axes[page_idx][subplot_idx].transAxes)
        
        # Special handling for Bu_DTF_chi2
        elif var_name == 'Bu_DTF_chi2':
            try:
                # Convert to numpy and handle the data type properly
                ll_dtf_chi2_raw = ak.to_numpy(mc_ll["Bu_DTF_chi2"])
                dd_dtf_chi2_raw = ak.to_numpy(mc_dd["Bu_DTF_chi2"])
                
                # Handle the data structure properly
                ll_dtf_chi2 = []
                dd_dtf_chi2 = []
                
                # Check if the arrays are already flat or if they need to be processed
                if isinstance(ll_dtf_chi2_raw, np.ndarray) and ll_dtf_chi2_raw.ndim == 1:
                    # If it's already a flat array, use it directly
                    ll_dtf_chi2 = ll_dtf_chi2_raw
                else:
                    # Process each entry - safely extracting first element if possible
                    for item in ll_dtf_chi2_raw:
                        try:
                            # Try to access first element
                            if isinstance(item, (list, np.ndarray)) and len(item) > 0:
                                ll_dtf_chi2.append(float(item[0]))
                            elif hasattr(item, "__iter__") and len(list(item)) > 0:
                                ll_dtf_chi2.append(float(list(item)[0]))
                            else:
                                # If it's a scalar, use it directly
                                ll_dtf_chi2.append(float(item))
                        except (IndexError, TypeError, ValueError) as e:
                            # Skip problematic entries
                            ll_dtf_chi2.append(np.nan)
                            print(f"Skipping entry in LL Bu_DTF_chi2: {e}")
                
                # Same for DD
                if isinstance(dd_dtf_chi2_raw, np.ndarray) and dd_dtf_chi2_raw.ndim == 1:
                    dd_dtf_chi2 = dd_dtf_chi2_raw
                else:
                    for item in dd_dtf_chi2_raw:
                        try:
                            if isinstance(item, (list, np.ndarray)) and len(item) > 0:
                                dd_dtf_chi2.append(float(item[0]))
                            elif hasattr(item, "__iter__") and len(list(item)) > 0:
                                dd_dtf_chi2.append(float(list(item)[0]))
                            else:
                                dd_dtf_chi2.append(float(item))
                        except (IndexError, TypeError, ValueError) as e:
                            dd_dtf_chi2.append(np.nan)
                            print(f"Skipping entry in DD Bu_DTF_chi2: {e}")
                
                # Convert to numpy arrays
                ll_dtf_chi2 = np.array(ll_dtf_chi2, dtype=float)
                dd_dtf_chi2 = np.array(dd_dtf_chi2, dtype=float)
                
                # Remove NaN values
                ll_dtf_chi2 = ll_dtf_chi2[~np.isnan(ll_dtf_chi2)]
                dd_dtf_chi2 = dd_dtf_chi2[~np.isnan(dd_dtf_chi2)]
                
                # Debug info
                print(f"LL DTF chi2 shape: {ll_dtf_chi2.shape}, type: {type(ll_dtf_chi2)}")
                print(f"DD DTF chi2 shape: {dd_dtf_chi2.shape}, type: {type(dd_dtf_chi2)}")
                
                # Ensure we have valid data before plotting
                if len(ll_dtf_chi2) > 0:
                    process_variable_single(ll_axes[page_idx][subplot_idx], ll_dtf_chi2, var_info, "LL")
                else:
                    ll_axes[page_idx][subplot_idx].text(0.5, 0.5, "No valid Bu_DTF_chi2 data for LL", 
                               ha='center', va='center', transform=ll_axes[page_idx][subplot_idx].transAxes)
                
                if len(dd_dtf_chi2) > 0:
                    process_variable_single(dd_axes[page_idx][subplot_idx], dd_dtf_chi2, var_info, "DD")
                else:
                    dd_axes[page_idx][subplot_idx].text(0.5, 0.5, "No valid Bu_DTF_chi2 data for DD", 
                               ha='center', va='center', transform=dd_axes[page_idx][subplot_idx].transAxes)
                
            except Exception as e:
                print(f"Error processing Bu_DTF_chi2: {e}")
                import traceback
                traceback.print_exc()
                ll_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error processing Bu_DTF_chi2: {e}", 
                               ha='center', va='center', transform=ll_axes[page_idx][subplot_idx].transAxes)
                dd_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error processing Bu_DTF_chi2: {e}", 
                               ha='center', va='center', transform=dd_axes[page_idx][subplot_idx].transAxes)
        
        # Special handling for KK product
        elif var_name == 'kk_product':
            try:
                # Get the individual kaon ID probabilities
                ll_h1_probnnk = np.array(ak.to_numpy(mc_ll['h1_ProbNNk']))
                ll_h2_probnnk = np.array(ak.to_numpy(mc_ll['h2_ProbNNk']))
                dd_h1_probnnk = np.array(ak.to_numpy(mc_dd['h1_ProbNNk']))
                dd_h2_probnnk = np.array(ak.to_numpy(mc_dd['h2_ProbNNk']))
                
                # Calculate the product
                ll_kk_product = ll_h1_probnnk * ll_h2_probnnk
                dd_kk_product = dd_h1_probnnk * dd_h2_probnnk
                
                # Process KK product
                process_variable_single(ll_axes[page_idx][subplot_idx], ll_kk_product, var_info, "LL")
                process_variable_single(dd_axes[page_idx][subplot_idx], dd_kk_product, var_info, "DD")
                
            except Exception as e:
                print(f"Error processing KK product: {e}")
                ll_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error processing KK product: {e}", 
                               ha='center', va='center', transform=ll_axes[page_idx][subplot_idx].transAxes)
                dd_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error processing KK product: {e}", 
                               ha='center', va='center', transform=dd_axes[page_idx][subplot_idx].transAxes)
        
        # Standard variables
        else:
            try:
                # Convert to numpy arrays to avoid awkward array issues
                try:
                    ll_data = np.array(ak.to_numpy(mc_ll[var_name]))
                    dd_data = np.array(ak.to_numpy(mc_dd[var_name]))
                    
                    # Process the variable for each plot separately
                    process_variable_single(ll_axes[page_idx][subplot_idx], ll_data, var_info, "LL")
                    process_variable_single(dd_axes[page_idx][subplot_idx], dd_data, var_info, "DD")
                    
                except Exception as e:
                    print(f"Error extracting {var_name}: {e}")
                    ll_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error extracting {var_name}", 
                               ha='center', va='center', transform=ll_axes[page_idx][subplot_idx].transAxes)
                    dd_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error extracting {var_name}", 
                               ha='center', va='center', transform=dd_axes[page_idx][subplot_idx].transAxes)
                    
            except Exception as e:
                print(f"Error processing variable {var_name}: {e}")
                ll_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error in processing", 
                           ha='center', va='center', transform=ll_axes[page_idx][subplot_idx].transAxes)
                dd_axes[page_idx][subplot_idx].text(0.5, 0.5, f"Error in processing", 
                           ha='center', va='center', transform=dd_axes[page_idx][subplot_idx].transAxes)
    
    # Handle any unused subplots in the last page
    remaining = vars_per_page - (n_vars % vars_per_page)
    if remaining < vars_per_page:  # Only if it's not a full page
        last_page = n_pages - 1
        for j in range(vars_per_page - remaining, vars_per_page):
            if j < len(ll_axes[last_page]):
                ll_axes[last_page][j].set_visible(False)
                dd_axes[last_page][j].set_visible(False)
    
    # Save all figures with appropriate titles and layout adjustments
    for page in range(n_pages):
        # Add titles to each page
        ll_figs[page].suptitle(f"Selection Variables - LL Sample (Page {page+1}/{n_pages})", fontsize=16)
        dd_figs[page].suptitle(f"Selection Variables - DD Sample (Page {page+1}/{n_pages})", fontsize=16)
        
        # Adjust layout with more space
        plt.figure(ll_figs[page].number)
        plt.tight_layout(rect=[0, 0, 1, 0.95])  # Add padding for the title
        
        plt.figure(dd_figs[page].number)
        plt.tight_layout(rect=[0, 0, 1, 0.95])  # Add padding for the title
        
        # Generate filenames with page numbers
        ll_filename = f"{output_prefix}_LL_data_page{page+1}.pdf"
        dd_filename = f"{output_prefix}_DD_data_page{page+1}.pdf"
        
        plt.figure(ll_figs[page].number)
        plt.savefig(ll_filename, dpi=300, bbox_inches='tight')
        
        plt.figure(dd_figs[page].number)
        plt.savefig(dd_filename, dpi=300, bbox_inches='tight')
        
        print(f"LL plot page {page+1} saved to {ll_filename}")
        print(f"DD plot page {page+1} saved to {dd_filename}")
        
        plt.close(ll_figs[page])
        plt.close(dd_figs[page])
    
    return

def process_variable_single(ax, data, var_info, sample_type):
    """
    Process and plot a single variable for a single sample
    
    Parameters:
    -----------
    ax : matplotlib.axes.Axes
        Axes to plot on
    data : numpy.ndarray
        Data from sample
    var_info : dict
        Variable information (name, label, cut_value, cut_type)
    sample_type : str
        Sample type ("LL" or "DD")
    """
    var_name = var_info['name']
    var_label = var_info['label']
    cut_value = var_info['cut_value']
    cut_type = var_info['cut_type']
    
    # Make sure data is a numpy array
    data = np.array(data, dtype=float)
    
    # Filter out any NaN or inf values
    data = data[~np.isnan(data) & ~np.isinf(data)]
    
    # Skip if no data left
    if len(data) == 0:
        ax.text(0.5, 0.5, "No valid data available", 
                ha='center', va='center', transform=ax.transAxes)
        return
    
    # Calculate pass percentage
    if cut_type == '>':
        pass_percent = (data > cut_value).sum() / len(data) * 100
    else:  # '<'
        pass_percent = (data < cut_value).sum() / len(data) * 100
    
    # Create bins for histogram
    min_val = np.min(data)
    max_val = np.max(data)
    
    # Special handling for some variables with extreme ranges
    if max_val - min_val > 1000 and 'PT' in var_name:
        # For PT variables, focus on the important range
        min_val = max(0, min_val)
        max_val = min(10000, max_val)
    elif max_val - min_val > 1000 and 'CHI2' in var_name:
        # For CHI2 variables, focus on the important range
        min_val = max(0, min_val)
        max_val = min(500, max_val)
    
    # Add padding to range
    range_padding = (max_val - min_val) * 0.1
    hist_range = (min_val - range_padding, max_val + range_padding)
    
    # Set number of bins based on data range
    if max_val - min_val > 100:
        bins = 50
    else:
        bins = 30
    
    # Create histogram with raw counts (not density)
    hist, bins = np.histogram(data, bins=bins, range=hist_range, density=False)
    
    # Calculate bin centers
    centers = (bins[:-1] + bins[1:]) / 2
    
    # Plot histograms as step plots (cleaner than bars)
    color = 'blue' if sample_type == 'LL' else 'green'
    ax.step(centers, hist, where='mid', color=color, linewidth=2)
    
    # Add vertical line at cut value
    ymin, ymax = 0, np.max(hist) * 1.1
    ax.set_ylim(ymin, ymax)
    ax.vlines(cut_value, ymin, ymax, colors='red', linestyles='dashed', linewidth=2)
    
    # Add cut text at the top of the graph
    ax.text(
        0.5, 
        0.95, 
        f"Cut: {cut_type} {cut_value} ({pass_percent:.1f}% pass)", 
        transform=ax.transAxes, 
        verticalalignment='top', 
        horizontalalignment='center',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.8)
    )
    
    # Set title and labels
    ax.set_title(f"{var_label} - {sample_type}")
    ax.set_xlabel(var_label)
    ax.set_ylabel('Events')
    
    return

In [8]:
plot_selection_variables_separate(data_ll, data_dd, output_prefix="selection_variables")

LL DTF chi2 shape: (713632,), type: <class 'numpy.ndarray'>
DD DTF chi2 shape: (561928,), type: <class 'numpy.ndarray'>
LL plot page 1 saved to selection_variables_LL_data_page1.pdf
DD plot page 1 saved to selection_variables_DD_data_page1.pdf
LL plot page 2 saved to selection_variables_LL_data_page2.pdf
DD plot page 2 saved to selection_variables_DD_data_page2.pdf
