In [5]:

import uproot
import numpy as np
import mplhep as hep
import dask_awkward as dk
import awkward as ak
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import vector
import random
import time 
import hist
from uncertainties import unumpy as unp
import os

In [7]:
import uproot
import os

# Path to the NanoAOD central sample directory
directory_path_nanoaod = "/depot/cms/top/bhanda25/EFT/Nanoaod_central_sample/"

# Filter: Only .root files with preVFP in the filename (typically UL16APV)
desired_files_nanoaod = sorted([
    f for f in os.listdir(directory_path_nanoaod)
    if f.endswith(".root") and ("preVFP" in f or "UL16APV" in f)
])  # Limit to first 10

# Full paths
file_paths_nanoaod = [os.path.join(directory_path_nanoaod, f) for f in desired_files_nanoaod]

# Load trees
trees_nanoaod = []
for i, fp in enumerate(file_paths_nanoaod):
    print(f" Loading file {i+1}/{len(file_paths_nanoaod)}: {os.path.basename(fp)}")
    trees_nanoaod.append(uproot.open(fp)["Events"])

# Concatenate events
nanogen = uproot.concatenate(trees_nanoaod)


 Loading file 1/16: TTto2L2Nu-1Jets-smeft_MTT-0to700_2016preVFP_NanoAOD_1.root
 Loading file 2/16: TTto2L2Nu-1Jets-smeft_MTT-0to700_2016preVFP_NanoAOD_2.root
 Loading file 3/16: TTto2L2Nu-1Jets-smeft_MTT-0to700_2016preVFP_NanoAOD_3.root
 Loading file 4/16: TTto2L2Nu-1Jets-smeft_MTT-0to700_2016preVFP_NanoAOD_4.root
 Loading file 5/16: TTto2L2Nu-1Jets-smeft_MTT-0to700_2016preVFP_NanoAOD_5.root
 Loading file 6/16: TTto2L2Nu-1Jets-smeft_MTT-0to700_2016preVFP_NanoAOD_6.root
 Loading file 7/16: TTto2L2Nu-1Jets-smeft_MTT-0to700_2016preVFP_NanoAOD_7.root
 Loading file 8/16: TTto2L2Nu-1Jets-smeft_MTT-0to700_2016preVFP_NanoAOD_8.root
 Loading file 9/16: TTto2L2Nu-1Jets-smeft_MTT-700to900_2016preVFP_NanoAOD_1.root
 Loading file 10/16: TTto2L2Nu-1Jets-smeft_MTT-700to900_2016preVFP_NanoAOD_2.root
 Loading file 11/16: TTto2L2Nu-1Jets-smeft_MTT-900toInf_2016preVFP_NanoAOD_1.root
 Loading file 12/16: TTto2L2Nu-1Jets-smeft_MTT-900toInf_2016preVFP_NanoAOD_2.root
 Loading file 13/16: TTto2L2Nu-1Jets-smef

In [8]:
nanogen

In [9]:
genpart_pt = nanogen['GenPart_pt']
genpart_eta = nanogen['GenPart_eta']
genpart_phi = nanogen['GenPart_phi']
genpart_mass = nanogen['GenPart_mass']
genpart_pid = nanogen['GenPart_pdgId']
genpart_mother = nanogen['GenPart_genPartIdxMother']
gen_weight=nanogen['LHEReweightingWeight']

In [5]:

genvec = vector.Array(ak.zip({
    "pt": genpart_pt,
    "eta": genpart_eta,
    "phi": genpart_phi,
    "mass": genpart_mass,
}))

# Extract vector components
E = genvec.E
px = genvec.px
py = genvec.py
pz = genvec.pz

In [6]:
# Identify particle masks
is_top = genpart_pid == 6
is_antitop = genpart_pid == -6
is_wplus = genpart_pid == 24
is_wminus = genpart_pid == -24
is_bottom = genpart_pid == 5
is_antibottom = genpart_pid == -5

# Get local indices
indices = ak.local_index(genpart_pid)

# Get top and antitop indices
top_indices = indices[is_top]
antitop_indices = indices[is_antitop]

# Get W+ and W- mother indices
wplus_mothers = genpart_mother[is_wplus]
wminus_mothers = genpart_mother[is_wminus]

# Match top → W+ and antitop → W-
top_with_wplus_daughter_mask = ak.any(top_indices[:, :, None] == wplus_mothers[:, None, :], axis=-1)
antitop_with_wminus_daughter_mask = ak.any(antitop_indices[:, :, None] == wminus_mothers[:, None, :], axis=-1)

# Now find W+ and W- indices
Wplus_indices = indices[is_wplus]
Wminus_indices = indices[is_wminus]

# Step 3: For each particle, check if its mother is a W⁺ or W⁻ (element-wise)
mother_is_Wplus = ak.any(genpart_mother[:, None] == Wplus_indices, axis=1)
mother_is_Wminus = ak.any(genpart_mother[:, None] == Wminus_indices, axis=1)

# Require both decays
has_two_tops = (ak.sum(top_with_wplus_daughter_mask, axis=1) +
                ak.sum(antitop_with_wminus_daughter_mask, axis=1)) == 2


In [7]:
def compute_gen_spin_corrs(E, px, py, pz, is_top, is_antitop, dilepton_event_mask,
                           top_with_wplus_daughter_mask, antitop_with_wminus_daughter_mask,
                           mother_is_Wplus, mother_is_Wminus, 
                           Wplus_daughter_mask, Wminus_daughter_mask):
    
    
    # Extract masked jagged arrays
    top_E  = E[is_top][top_with_wplus_daughter_mask][dilepton_event_mask]
    top_px = px[is_top][top_with_wplus_daughter_mask][dilepton_event_mask]
    top_py = py[is_top][top_with_wplus_daughter_mask][dilepton_event_mask]
    top_pz = pz[is_top][top_with_wplus_daughter_mask][dilepton_event_mask]

    tbar_E  = E[is_antitop][antitop_with_wminus_daughter_mask][dilepton_event_mask]
    tbar_px = px[is_antitop][antitop_with_wminus_daughter_mask][dilepton_event_mask]
    tbar_py = py[is_antitop][antitop_with_wminus_daughter_mask][dilepton_event_mask]
    tbar_pz = pz[is_antitop][antitop_with_wminus_daughter_mask][dilepton_event_mask]

    l_E  = E[mother_is_Wminus][Wminus_daughter_mask][dilepton_event_mask]
    l_px = px[mother_is_Wminus][Wminus_daughter_mask][dilepton_event_mask]
    l_py = py[mother_is_Wminus][Wminus_daughter_mask][dilepton_event_mask]
    l_pz = pz[mother_is_Wminus][Wminus_daughter_mask][dilepton_event_mask]

    lbar_E  = E[mother_is_Wplus][Wplus_daughter_mask][dilepton_event_mask]
    lbar_px = px[mother_is_Wplus][Wplus_daughter_mask][dilepton_event_mask]
    lbar_py = py[mother_is_Wplus][Wplus_daughter_mask][dilepton_event_mask]
    lbar_pz = pz[mother_is_Wplus][Wplus_daughter_mask][dilepton_event_mask]

    # Build valid event mask: events where all branches have length == 1
    valid_events = (
        (ak.num(top_E) == 1) &
        (ak.num(tbar_E) == 1) &
        (ak.num(l_E) == 1) &
        (ak.num(lbar_E) == 1)
    )

    # Apply valid event mask (still keeping jagged structure, shape = [N][1])
    top_E = top_E[valid_events]
    top_px = top_px[valid_events]
    top_py = top_py[valid_events]
    top_pz = top_pz[valid_events]

    tbar_E = tbar_E[valid_events]
    tbar_px = tbar_px[valid_events]
    tbar_py = tbar_py[valid_events]
    tbar_pz = tbar_pz[valid_events]

    l_E = l_E[valid_events]
    l_px = l_px[valid_events]
    l_py = l_py[valid_events]
    l_pz = l_pz[valid_events]

    lbar_E = lbar_E[valid_events]
    lbar_px = lbar_px[valid_events]
    lbar_py = lbar_py[valid_events]
    lbar_pz = lbar_pz[valid_events]

    # Build 4-vectors from consistent [N][1] arrays
    tops = vector.zip({'E': top_E, 'px': top_px, 'py': top_py, 'pz': top_pz})
    tbars = vector.zip({'E': tbar_E, 'px': tbar_px, 'py': tbar_py, 'pz': tbar_pz})
    ls = vector.zip({'E': l_E, 'px': l_px, 'py': l_py, 'pz': l_pz})
    lbars = vector.zip({'E': lbar_E, 'px': lbar_px, 'py': lbar_py, 'pz': lbar_pz})
    
    

    # Compute spin correlation observables in one step
    ttbar_frame = tops + tbars
    boosted_tops = tops.boostCM_of(ttbar_frame)
    boosted_tbars = tbars.boostCM_of(ttbar_frame)
    boosted_ls = ls.boostCM_of(ttbar_frame).boostCM_of(boosted_tbars)
    boosted_lbars = lbars.boostCM_of(ttbar_frame).boostCM_of(boosted_tops)
    p_axis = vector.obj(x=0, y=0, z=1)
    k_axis = boosted_tops.to_xyz().unit()
    scattering_angle = k_axis.theta
    sin_scat_angle = np.sin(scattering_angle)
    sin_scat_angle = np.where(np.abs(sin_scat_angle) < 1e-5, 1e-5, sin_scat_angle)
    axis_coeff = np.sign(np.cos(scattering_angle)) / np.abs(sin_scat_angle)
    r_axis = axis_coeff * (p_axis - (k_axis * np.cos(scattering_angle)))
    n_axis = axis_coeff * p_axis.cross(k_axis)
    
    ll_cHel = np.cos(boosted_lbars.deltaangle(boosted_ls).to_numpy()).reshape(-1)
    cos_theta1k = np.cos(boosted_lbars.deltaangle(k_axis).to_numpy())
    cos_theta1r = np.cos(boosted_lbars.deltaangle(r_axis).to_numpy())
    cos_theta1n = np.cos(boosted_lbars.deltaangle(n_axis).to_numpy())
    cos_theta2k = np.cos(boosted_ls.deltaangle(-1 * k_axis).to_numpy())
    cos_theta2r = np.cos(boosted_ls.deltaangle(-1 * r_axis).to_numpy())
    cos_theta2n = np.cos(boosted_ls.deltaangle(-1 * n_axis).to_numpy())
    
    ttbar_mass = (tops + tbars).M.to_numpy().reshape(-1)
    scattering_angle = scattering_angle.to_numpy().reshape(-1)
    B1 = np.stack((cos_theta1k, cos_theta1r, cos_theta1n), axis=1)
    B2 = np.stack((cos_theta2k, cos_theta2r, cos_theta2n), axis=1)
    # Compute individual C_ij components
    c_kk = cos_theta1k * cos_theta2k
    c_kr = cos_theta1k * cos_theta2r
    c_kn = cos_theta1k * cos_theta2n
    c_rk = cos_theta1r * cos_theta2k
    c_rr = cos_theta1r * cos_theta2r
    c_rn = cos_theta1r * cos_theta2n
    c_nk = cos_theta1n * cos_theta2k
    c_nr = cos_theta1n * cos_theta2r
    c_nn = cos_theta1n * cos_theta2n
    
    # Stack them row-wise first, then column-wise, then transpose
    C = np.stack([
        np.stack([c_kk, c_kr, c_kn], axis=1),  # shape (n_events, 3)
        np.stack([c_rk, c_rr, c_rn], axis=1),
        np.stack([c_nk, c_nr, c_nn], axis=1)
    ], axis=1)  # shape will now be (n_events, 3, 3)
    
    return scattering_angle, ttbar_mass, B1, B2, C, ll_cHel, ls, lbars, tops, tbars, valid_events


    

In [8]:
import os
import uproot
import awkward as ak

# Output directory
output_dir = "/depot/cms/top/bhanda25/Purdue_Analysis_EFT/EFT_nanogen_analysis/Output_root_nanogen"
os.makedirs(output_dir, exist_ok=True)

# Channel configurations
channel_pdg_ids = {
    "ee": [[-11, 11]],
    "emu": [[-11, 13], [-13, 11]],
    "mumu": [[-13, 13]],
}
channels = list(channel_pdg_ids.keys())

# Loop over channels
for channel in channels:
    # Initialize storage
    dilepton_scat_angle = []
    dilepton_ttbar_mass = []
    dilepton_B1 = np.empty((0, 3))
    dilepton_B2 = np.empty((0, 3))
    dilepton_C = np.empty((0, 3, 3))
    dilepton_ll_cHel = []

    dilepton_l_pt = []
    dilepton_l_eta = []
    dilepton_l_phi = []
    dilepton_l_mass = []

    dilepton_lbar_pt = []
    dilepton_lbar_eta = []
    dilepton_lbar_phi = []
    dilepton_lbar_mass = []

    dilepton_top_pt = []
    dilepton_top_eta = []
    dilepton_top_phi = []
    dilepton_top_mass = []

    dilepton_tbar_pt = []
    dilepton_tbar_eta = []
    dilepton_tbar_phi = []
    dilepton_tbar_mass = []

    # Loop over PDG ID pairs (e.g. [-11, 13] and [-13, 11] for emu)
    for pdg_ids in channel_pdg_ids[channel]:
        Wplus_with_lbar_daughters_mask = genpart_pid[mother_is_Wplus] == pdg_ids[0]
        Wminus_with_l_daughters_mask = genpart_pid[mother_is_Wminus] == pdg_ids[1]

        has_two_leptons = (
            ak.sum(Wplus_with_lbar_daughters_mask, axis=1) +
            ak.sum(Wminus_with_l_daughters_mask, axis=1)
        ) == 2

        dilepton_event_mask = has_two_tops & has_two_leptons

        # Compute observables
        scat_angle, ttbar_mass, B1, B2, C, ll_cHel, l, lbar, top, tbar,valid_events = compute_gen_spin_corrs(
            E, px, py, pz, is_top, is_antitop, dilepton_event_mask,
            top_with_wplus_daughter_mask, antitop_with_wminus_daughter_mask, mother_is_Wplus, mother_is_Wminus,
            Wplus_with_lbar_daughters_mask, Wminus_with_l_daughters_mask)
        
        event_weights=gen_weight[dilepton_event_mask][valid_events]
        if len(dilepton_scat_angle) == 0:
            dilepton_scat_angle = scat_angle
            dilepton_ttbar_mass = ttbar_mass
            dilepton_B1 = B1
            dilepton_B2 = B2
            dilepton_C = C
            dilepton_ll_cHel = ll_cHel
            dilepton_l_pt = l.pt
            dilepton_l_eta = l.eta
            dilepton_l_phi = l.phi
            dilepton_l_mass = l.M
            dilepton_lbar_pt = lbar.pt
            dilepton_lbar_eta = lbar.eta
            dilepton_lbar_phi = lbar.phi
            dilepton_lbar_mass = lbar.M
            dilepton_top_pt = top.pt
            dilepton_top_eta = top.eta
            dilepton_top_phi = top.phi
            dilepton_top_mass = top.M
            dilepton_tbar_pt = tbar.pt
            dilepton_tbar_eta = tbar.eta
            dilepton_tbar_phi = tbar.phi
            dilepton_tbar_mass = tbar.M
            event_weight=event_weights
        else:
            dilepton_scat_angle = np.concatenate((dilepton_scat_angle, scat_angle))
            dilepton_ttbar_mass = np.concatenate((dilepton_ttbar_mass, ttbar_mass))
            dilepton_B1 = np.concatenate((dilepton_B1, B1))
            dilepton_B2 = np.concatenate((dilepton_B2, B2))
            dilepton_C = np.concatenate((dilepton_C, C))
            dilepton_ll_cHel = np.concatenate((dilepton_ll_cHel, ll_cHel))
            dilepton_l_pt = np.concatenate((dilepton_l_pt, l.pt))
            dilepton_l_eta = np.concatenate((dilepton_l_eta, l.eta))
            dilepton_l_phi = np.concatenate((dilepton_l_phi, l.phi))
            dilepton_l_mass = np.concatenate((dilepton_l_mass, l.M))
            dilepton_lbar_pt = np.concatenate((dilepton_lbar_pt, lbar.pt))
            dilepton_lbar_eta = np.concatenate((dilepton_lbar_eta, lbar.eta))
            dilepton_lbar_phi = np.concatenate((dilepton_lbar_phi, lbar.phi))
            dilepton_lbar_mass = np.concatenate((dilepton_lbar_mass, lbar.M))
            dilepton_top_pt = np.concatenate((dilepton_top_pt, top.pt))
            dilepton_top_eta = np.concatenate((dilepton_top_eta, top.eta))
            dilepton_top_phi = np.concatenate((dilepton_top_phi, top.phi))
            dilepton_top_mass = np.concatenate((dilepton_top_mass, top.M))
            dilepton_tbar_pt = np.concatenate((dilepton_tbar_pt, tbar.pt))
            dilepton_tbar_eta = np.concatenate((dilepton_tbar_eta, tbar.eta))
            dilepton_tbar_phi = np.concatenate((dilepton_tbar_phi, tbar.phi))
            dilepton_tbar_mass = np.concatenate((dilepton_tbar_mass, tbar.M))
            event_weight = np.concatenate((event_weight, event_weights))

    # Write output ROOT file per channel
    filename = f"{channel}_nanogen_central_sample_dileptonic.root"
    branches = [
        dilepton_scat_angle, dilepton_ttbar_mass, dilepton_B1, dilepton_B2, dilepton_C, dilepton_ll_cHel,
        dilepton_l_pt, dilepton_l_eta, dilepton_lbar_pt, dilepton_lbar_eta, dilepton_top_pt, dilepton_top_eta, dilepton_top_phi, dilepton_top_mass,
        dilepton_tbar_pt, dilepton_tbar_eta, dilepton_tbar_phi, dilepton_tbar_mass,event_weight
    ]
    branch_names = [
        "gen_scat_angle", "gen_ttbar_mass", "gen_hel_B1", "gen_hel_B2", "gen_hel_C", "gen_ll_cHel",
        "gen_l_pt", "gen_l_eta", "gen_lbar_pt", "gen_lbar_eta","gen_top_pt", "gen_top_eta", "gen_top_phi", "gen_top_mass", "gen_tbar_pt", 
        "gen_tbar_eta", "gen_tbar_phi", "gen_tbar_mass", "event_weight"  ]

    with uproot.recreate(f"{output_dir}/{filename}") as file:
        for name, branch in zip(branch_names, branches):
            file[name] = ak.Array(branch)

    print(f"✅ {channel} channel written to: {output_dir}/{filename}")


✅ ee channel written to: /depot/cms/top/bhanda25/Purdue_Analysis_EFT/EFT_nanogen_analysis/Output_root_nanogen/ee_nanogen_central_sample_dileptonic.root
✅ emu channel written to: /depot/cms/top/bhanda25/Purdue_Analysis_EFT/EFT_nanogen_analysis/Output_root_nanogen/emu_nanogen_central_sample_dileptonic.root
✅ mumu channel written to: /depot/cms/top/bhanda25/Purdue_Analysis_EFT/EFT_nanogen_analysis/Output_root_nanogen/mumu_nanogen_central_sample_dileptonic.root


In [9]:
file2=uproot.open('/depot/cms/top/bhanda25/Purdue_Analysis_EFT/EFT_nanogen_analysis/Output_root_nanogen/ee_nanogen_central_sample_dileptonic.root')
file2['gen_hel_B1;1'].arrays()