In [None]:
import numpy as np
import mplhep as hep
import matplotlib.pyplot as plt
import uproot, os, sys, json, contextlib
import awkward as ak
# Get the notebook directory
notebook_dir = os.path.dirname(os.path.abspath("__file__"))
# Add the project root to sys.path
sys.path.append(os.path.join(notebook_dir, ".."))
from utils.branches import get_branches, truth_branches
from utils.plot import plot_data
from utils.constants import truthJpsiK, trigcut, dataPath, mcCorrPath, mcPath, truthpkk
from utils.data_loader import load_data
from utils.fit import load_mc_fit_params, fit_data_with_mc_constraints
from matplotlib import rcParams
import matplotlib as mpl
import ROOT
plt.style.use(hep.style.LHCb1)
config = {"mathtext.fontset":'stix'}
rcParams.update(config)



In [2]:
plt.rcParams.update({
    # Keep the font family settings for LHCb style
    "font.family": "serif",
    "font.serif": ["Times", "Computer Modern Roman", "DejaVu Serif"],
    
    # # Increase only the size-related parameters
    # "figure.figsize": (15, 10),  # Larger figure
    # "figure.dpi": 100,          # Screen display
    # "savefig.dpi": 300,         # Saved figure resolution
    
    # # # Increase font sizes while keeping LHCb style
    # "font.size": 12,            # Base font size (increase from default)
    # "axes.titlesize": 12,       # Title size
    # "axes.labelsize": 10,       # Axis label size
    "xtick.labelsize": 14,      # X tick label size
    "ytick.labelsize": 14,      # Y tick label size
    "legend.fontsize": 16       # Legend font size
})


In [3]:
data_path = "/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced"
decay_modes = "L0barPKpKm"
particles = ["h1", "h2", "p"]

data_ll = load_data(
    data_path=data_path,
    decay_mode=decay_modes,
    tracks=["LL"],
    particles=particles
)

data_dd = load_data(
    data_path=data_path,
    decay_mode=decay_modes,
    tracks=["DD"],
    particles=particles
)


Real Data Files being processed for decay mode L0barPKpKm with tracks ['LL']: ['/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_16MD_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_16MU_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_17MD_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_17MU_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_18MD_reduced.root:B2L0barPKpKm_LL/DecayTree', '/share/lazy/Mohamed/Bu2LambdaPPP/RD/restripped.data/reduced/dataBu2L0barPHH_18MU_reduced.root:B2L0barPKpKm_LL/DecayTree']
Branches being read: ['h1_P', 'h1_PT', 'h1_PE', 'h1_PX', 'h1_PY', 'h1_PZ', 'h1_ID', 'h1_TRACK_Type', 'h1_IPCHI2_OWNPV', 'h2_P', 'h2_PT', 'h2_PE', 'h2_PX', 'h2_PY', 'h2_PZ', 'h2_ID', '

In [4]:
def select(data, track_type='LL'):
    """
    Apply selection cuts to B+ → Λ0 h1 h2 samples incrementally
    
    Parameters:
    -----------
    data : awkward.Array
        Events data
    track_type : str
        Track type, either 'LL' (Long-Long) or 'DD' (Downstream-Downstream)
    
    Returns:
    --------
    awkward.Array
        Selected events after applying all cuts
    """
    # Keep track of initial count for reporting
    initial_count = len(data)
    
    # Apply cuts one at a time, printing progress if desired
    print(f"Initial events: {initial_count}")
    
    # ===== p (Proton) Cuts =====
    data = data[data['p_MC15TuneV1_ProbNNp'] > 0.05]
    print(f"After proton prob cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    
    # ===== Λ0 Cuts =====
    # Delta Z cut (difference between Lambda decay vertex and primary vertex)
    data = data[(data['L0_ENDVERTEX_Z'] - data['L0_OWNPV_Z']) > 20]
    print(f"After delta Z cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    
    # Lambda flight distance chi2
    try:
        data = data[data['L0_FDCHI2_OWNPV'] > 45]
    except:
        # Try alternative field name
        data = data[data['L0_FDCHI2_ORIVX'] > 45]
    print(f"After fd chi2 cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    
    # Lambda mass window
    data = data[(data['L0_M'] - 1115.6 < 6) & (data['L0_M'] - 1115.6 > -6)]
    print(f"After lambda mass cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    
    # Lambda proton ProbNN
    data = data[data['Lp_MC15TuneV1_ProbNNp'] > 0.2]
    print(f"After lambda proton prob cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    
    # ===== h1 and h2 (Kaon) Cuts =====
    # KK product
    try:
        data = data[(data['h1_ProbNNk'] * data['h2_ProbNNk']) > 0.04]
    except:
        # Try alternative field names
        data = data[(data['h1_MC15TuneV1_ProbNNk'] * data['h2_MC15TuneV1_ProbNNk']) > 0.04]
    print(f"After KK product cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    
    # ===== B⁺ Cuts =====
    # B PT cut
    data = data[data['Bu_PT'] > 3000]
    print(f"After B PT cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    
    # DTF chi2 cut - only apply if the field exists
    try:
        data = data[data['Bu_DTF_chi2'] < 30]
        print(f"After DTF chi2 cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    except:
        print("Skipping DTF chi2 cut (field not found)")
    
    # Impact Parameter Chi2 - only apply if the field exists
    try:
        data = data[data['Bu_IPCHI2_OWNPV'] < 10]
        print(f"After IP chi2 cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    except:
        print("Skipping IP chi2 cut (field not found)")
    
    # Flight Distance Chi2 - only apply if the field exists
    try:
        data = data[data['Bu_FDCHI2_OWNPV'] > 175]
        print(f"After B FD chi2 cut: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    except:
        print("Skipping B FD chi2 cut (field not found)")
    
    print(f"Final selected events: {len(data)}/{initial_count} ({len(data)/initial_count:.2%})")
    return data

In [5]:
data_selection = select(data_ll, track_type='LL')
data_selection = select(data_dd, track_type='DD')

Initial events: 713632
After proton prob cut: 713632/713632 (100.00%)
After delta Z cut: 629470/713632 (88.21%)
After fd chi2 cut: 572658/713632 (80.25%)
After lambda mass cut: 328178/713632 (45.99%)
After lambda proton prob cut: 262305/713632 (36.76%)
After KK product cut: 262305/713632 (36.76%)
After B PT cut: 262305/713632 (36.76%)
After DTF chi2 cut: 262305/713632 (36.76%)
After IP chi2 cut: 262305/713632 (36.76%)
After B FD chi2 cut: 262305/713632 (36.76%)
Final selected events: 262305/713632 (36.76%)
Initial events: 561928
After proton prob cut: 561928/561928 (100.00%)
After delta Z cut: 561873/561928 (99.99%)
After fd chi2 cut: 529917/561928 (94.30%)
After lambda mass cut: 465532/561928 (82.85%)
After lambda proton prob cut: 437936/561928 (77.93%)
After KK product cut: 437936/561928 (77.93%)
After B PT cut: 437936/561928 (77.93%)
After DTF chi2 cut: 437936/561928 (77.93%)
After IP chi2 cut: 437936/561928 (77.93%)
After B FD chi2 cut: 437936/561928 (77.93%)
Final selected events:

In [7]:
# Context manager to temporarily patch the title in RooFit frames
@contextlib.contextmanager
def with_title_patch(new_title):
    """
    Context manager to temporarily patch the title in RooFit frames
    """
    original_frame = ROOT.RooPlot.frame
    
    def patched_frame(self, *args, **kwargs):
        if 'Title' in kwargs:
            kwargs['Title'] = new_title
        else:
            # Find RooFit.Title in args
            for i, arg in enumerate(args):
                if isinstance(arg, ROOT.RooFit.RooCmdArg) and arg.GetName() == "Title":
                    args = list(args)
                    args[i] = ROOT.RooFit.Title(new_title)
                    args = tuple(args)
                    break
        return original_frame(self, *args, **kwargs)
    
    try:
        ROOT.RooPlot.frame = patched_frame
        yield
    finally:
        ROOT.RooPlot.frame = original_frame

def fit_track_type_data(data_selection, track_type, mc_params_dir=None, output_dir=None):
    """
    Fit data with MC constraints for a specific track type
    
    Parameters:
    -----------
    data_selection : awkward array or dict
        Data selection for this track type
    track_type : str
        Track type ('DD' or 'LL')
    mc_params_dir : str, optional
        Directory containing MC parameters, defaults to f"output_{track_type}/"
    output_dir : str, optional
        Directory to save output plots, defaults to f"output_{track_type}/"
        
    Returns:
    --------
    dict of fit results
    """
    # Set default directories if not provided
    if mc_params_dir is None:
        mc_params_dir = f"output_{track_type}/"
    
    if output_dir is None:
        output_dir = f"output_{track_type}/"
    
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created output directory: {output_dir}")
    
    # Load previously computed MC parameters
    mc_params_path = f"{mc_params_dir}/b_mass_shape_params_MC_DoubleGauss_Pol2.json"
    mc_params = load_mc_fit_params(mc_params_path)
    
    if mc_params is None:
        print(f"Error: Could not load MC parameters from {mc_params_path}")
        return None
    
    # Print MC parameters for verification
    print(f"\nMC parameters for {track_type}:")
    print(f"Model type: {mc_params['model']}")
    print(f"MC mean: {mc_params['mean']['value']:.2f} MeV/c²")
    print(f"MC sigma1: {mc_params['sigma1']['value']:.2f} MeV/c²")
    print(f"MC sigma2: {mc_params['sigma2']['value']:.2f} MeV/c²")
    
    # Patch the title to include track type
    fit_title = f"B+ Mass Fit - Double Gaussian + Poly2 Model - {track_type}"
    
    # Now fit the data using MC constraints with the patched title
    with with_title_patch(fit_title):
        fit_results = fit_data_with_mc_constraints(
            data_selection,
            mass_branch='Bu_MM',
            mc_params=mc_params,
            output_dir=output_dir
        )
    
    # Rename the output files to include track type
    old_file = f"{output_dir}/b_mass_fit_params_RealData_DoubleGauss_Pol2.json"
    new_file = f"{output_dir}/b_mass_fit_params_RealData_{track_type}_DoubleGauss_Pol2.json"
    
    if os.path.exists(old_file):
        os.rename(old_file, new_file)
        print(f"Renamed output file to include track type: {new_file}")
    
    old_pdf = f"{output_dir}/b_mass_fit_RealData_DoubleGauss_Pol2.pdf"
    new_pdf = f"{output_dir}/b_mass_fit_RealData_{track_type}_DoubleGauss_Pol2.pdf"
    
    if os.path.exists(old_pdf):
        os.rename(old_pdf, new_pdf)
        print(f"Renamed output PDF to include track type: {new_pdf}")
    
    # Print a summary of the fit results
    print(f"\nFit results summary for {track_type}:")
    print(f"Mean: {fit_results['mean']['value']:.2f} ± {fit_results['mean']['error']:.2f} MeV/c²")
    print(f"Signal yield: {fit_results['signal_yield']['value']:.0f} ± {fit_results['signal_yield']['error']:.0f}")
    print(f"Significance: {fit_results['signal_significance']:.1f}σ")
    print(f"χ²/ndf: {fit_results['chi2_ndf']:.3f}")
    
    return fit_results

# Function to run fits for both track types and compare results
def compare_track_types(data_ll, data_dd):
    """
    Run fits on both LL and DD track types and compare results
    
    Parameters:
    -----------
    data_ll : awkward array or dict
        Long-Long track data selection
    data_dd : awkward array or dict
        Downstream-Downstream track data selection
        
    Returns:
    --------
    tuple of (ll_results, dd_results)
    """
    print("\n========== FITTING LONG-LONG (LL) TRACKS ==========")
    ll_results = fit_track_type_data(data_ll, 'LL')
    
    print("\n========== FITTING DOWNSTREAM-DOWNSTREAM (DD) TRACKS ==========")
    dd_results = fit_track_type_data(data_dd, 'DD')
    
    # Compare results
    print("\n========== TRACK TYPE COMPARISON ==========")
    print(f"{'Parameter':<20} {'LL Value':<25} {'DD Value':<25}")
    print("-" * 70)
    
    # Mean
    ll_mean = f"{ll_results['mean']['value']:.2f} ± {ll_results['mean']['error']:.2f} MeV/c²"
    dd_mean = f"{dd_results['mean']['value']:.2f} ± {dd_results['mean']['error']:.2f} MeV/c²"
    print(f"{'Mean':<20} {ll_mean:<25} {dd_mean:<25}")
    
    # Effective resolution
    ll_res = f"{ll_results['effective_sigma']['value']:.2f} ± {ll_results['effective_sigma']['error']:.2f} MeV/c²"
    dd_res = f"{dd_results['effective_sigma']['value']:.2f} ± {dd_results['effective_sigma']['error']:.2f} MeV/c²"
    print(f"{'Resolution':<20} {ll_res:<25} {dd_res:<25}")
    
    # Signal yield
    ll_yield = f"{ll_results['signal_yield']['value']:.0f} ± {ll_results['signal_yield']['error']:.0f}"
    dd_yield = f"{dd_results['signal_yield']['value']:.0f} ± {dd_results['signal_yield']['error']:.0f}"
    print(f"{'Signal yield':<20} {ll_yield:<25} {dd_yield:<25}")
    
    # Signal significance
    ll_sig = f"{ll_results['signal_significance']:.1f}σ"
    dd_sig = f"{dd_results['signal_significance']:.1f}σ"
    print(f"{'Significance':<20} {ll_sig:<25} {dd_sig:<25}")
    
    # Chi-square
    ll_chi2 = f"{ll_results['chi2_ndf']:.3f}"
    dd_chi2 = f"{dd_results['chi2_ndf']:.3f}"
    print(f"{'χ²/ndf':<20} {ll_chi2:<25} {dd_chi2:<25}")
    
    # Combined signal
    total_yield = ll_results['signal_yield']['value'] + dd_results['signal_yield']['value']
    total_yield_err = np.sqrt(ll_results['signal_yield']['error']**2 + dd_results['signal_yield']['error']**2)
    print(f"\nCombined signal yield (LL+DD): {total_yield:.0f} ± {total_yield_err:.0f}")
    
    # Save comparison to a JSON file
    comparison = {
        "LL": {
            "mean": ll_results['mean'],
            "resolution": ll_results['effective_sigma'],
            "signal_yield": ll_results['signal_yield'],
            "significance": ll_results['signal_significance'],
            "chi2_ndf": ll_results['chi2_ndf']
        },
        "DD": {
            "mean": dd_results['mean'],
            "resolution": dd_results['effective_sigma'],
            "signal_yield": dd_results['signal_yield'],
            "significance": dd_results['signal_significance'],
            "chi2_ndf": dd_results['chi2_ndf']
        },
        "combined": {
            "signal_yield": {
                "value": total_yield,
                "error": total_yield_err
            }
        }
    }
    
    with open("output_comparison/track_type_comparison.json", 'w') as f:
        os.makedirs("output_comparison", exist_ok=True)
        json.dump(comparison, f, indent=4)
    
    return ll_results, dd_results


#  Analyze both track types and compare
ll_selection = select(data_ll, track_type='LL')
dd_selection = select(data_dd, track_type='DD')
ll_results, dd_results = compare_track_types(ll_selection, dd_selection)

Initial events: 713632
After proton prob cut: 713632/713632 (100.00%)
After delta Z cut: 629470/713632 (88.21%)
After fd chi2 cut: 572658/713632 (80.25%)
After lambda mass cut: 328178/713632 (45.99%)
After lambda proton prob cut: 262305/713632 (36.76%)
After KK product cut: 262305/713632 (36.76%)
After B PT cut: 262305/713632 (36.76%)
After DTF chi2 cut: 262305/713632 (36.76%)
After IP chi2 cut: 262305/713632 (36.76%)
After B FD chi2 cut: 262305/713632 (36.76%)
Final selected events: 262305/713632 (36.76%)
Initial events: 561928
After proton prob cut: 561928/561928 (100.00%)
After delta Z cut: 561873/561928 (99.99%)
After fd chi2 cut: 529917/561928 (94.30%)
After lambda mass cut: 465532/561928 (82.85%)
After lambda proton prob cut: 437936/561928 (77.93%)
After KK product cut: 437936/561928 (77.93%)
After B PT cut: 437936/561928 (77.93%)
After DTF chi2 cut: 437936/561928 (77.93%)
After IP chi2 cut: 437936/561928 (77.93%)
After B FD chi2 cut: 437936/561928 (77.93%)
Final selected events:

Info in <TCanvas::Print>: pdf file output_LL//b_mass_fit_RealData_DoubleGauss_Pol2.pdf has been created
Info in <TCanvas::Print>: pdf file output_DD//b_mass_fit_RealData_DoubleGauss_Pol2.pdf has been created
