In [1]:
from glob import glob
from typing import Union, Optional
from tqdm.auto import tqdm
import uproot as ur
import numpy as np
import json
from phc import module_reload
module_reload('zhh')
from zhh import get_raw_files, get_preselection_meta, get_preselection_summary, default_locations, EVENT_CATEGORY_TRUE

version = 'v1'
REPO_ROOT = '/afs/desy.de/user/b/bliewert/public/MarlinWorkdirs/ZHH'
DATA_ROOT = f'/nfs/dust/ilc/user/bliewert/zhh/Preselection/{version}'

In [154]:
all_files = glob(f'/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/**/*.slcio', recursive=True)

In [165]:
pass_s5 = list(filter(lambda a: 'ILD_s5_o1_v02' not in a, all_files))
print(f'Passing non-s5: {len(pass_s5)} (-{len(all_files) - len(pass_s5)})')

pass_s5_a_dl = list(filter(lambda a: any(loc in a for loc in default_locations), pass_s5))
print(f'Passing non-s5 + in default locations: {len(pass_s5_a_dl)} (-{len(pass_s5) - len(pass_s5_a_dl)})')

Passing non-s5: 3322 (-4123)
Passing non-s5 + in default locations: 2691 (-631)


In [147]:
rf = get_raw_files(debug=True)
print(len(rf))

116


In [2]:
meta = get_preselection_meta(DATA_ROOT)

In [4]:
results = get_preselection_summary(DATA_ROOT, meta)

In [171]:
np.sort(results.view()[results['tDuration'] > 1800][['loc', 'process', 'pol_e', 'pol_p', 'tDuration']], order=['tDuration'])

array([('hh', 'e1e1qqh',  1,  1, 1866.8632),
       ('hh', 'e2e2hh', -1,  1, 2025.1527),
       ('hh', 'e2e2hh',  1, -1, 2037.1294),
       ('hh', 'n1n1hh',  1, -1, 2196.8389),
       ('hh', 'e3e3hh', -1,  1, 2196.8726),
       ('hh', 'qqqqh', -1,  1, 2213.761 ),
       ('hh', 'e1e1qqh', -1, -1, 2235.6978),
       ('6f_ttbar', 'yyveev', -1,  1, 2261.2031),
       ('4f_ZZ_hadronic', '4f_zz_h', -1,  1, 2308.8687),
       ('6f_ttbar', 'yyveev',  1, -1, 2389.0977),
       ('hh', 'n1n1hh', -1,  1, 2393.8633),
       ('hh', 'e3e3hh',  1, -1, 2464.3025),
       ('hh', 'qqqqh',  1, -1, 2761.6877),
       ('hh', 'n1n1qqh', -1,  1, 2975.173 ),
       ('hh', 'n1n1qqh',  1, -1, 2997.345 ),
       ('hh', 'n23n23qqh',  1, -1, 3160.3406),
       ('hh', 'e1e1hh', -1, -1, 3358.022 ),
       ('hh', 'n23n23qqh', -1,  1, 3457.6848),
       ('hh', 'e1e1hh',  1, -1, 3565.2056),
       ('hh', 'e1e1hh', -1,  1, 3722.674 ),
       ('hh', 'e1e1hh',  1,  1, 4573.676 ),
       ('hh', 'n23n23hh',  1, -1, 4603.499 

Final State Analysis

In [6]:
def get_fs_errors(branch:int):
    try:
        with ur.open(f'{DATA_ROOT}/{branch}_FinalStates.root') as rf:
            errors = rf['eventTree']['error_code'].array()
        
        return errors
    except:
        return []

In [7]:
print('Found errors in:')
for result in results:
    if result['status'] == 'finished':
        branch, location, proc = result['branch'], result['loc'], result['process']
        n_errors = np.sum(get_fs_errors(branch))
        
        if n_errors > 0:
            print(f'Branch {branch} | Loc {location} | Process {proc} | CumErr {n_errors}')

Found errors in:
Branch 30 | Loc 4f_lowmee_singleZee_leptonic | Process 4f_lowmee_sze_l | CumErr 1176392
Branch 31 | Loc 4f_lowmee_singleZee_leptonic | Process 4f_lowmee_sze_l | CumErr 5983994
Branch 32 | Loc 4f_lowmee_singleZee_leptonic | Process 4f_lowmee_sze_l | CumErr 1176392
Branch 33 | Loc 4f_lowmee_singleZee_leptonic | Process 4f_lowmee_sze_l | CumErr 5983994
Branch 34 | Loc 4f_lowmee_singleZsingleWMix_lept | Process 4f_lowmee_szeorsw_l | CumErr 5989996
Branch 35 | Loc 4f_lowmee_singleZsingleWMix_lept | Process 4f_lowmee_szeorsw_l | CumErr 1836612
Branch 36 | Loc 4f_lowmee_singleZsingleWMix_lept | Process 4f_lowmee_szeorsw_l | CumErr 5269756


In [186]:
def get_fs_output(branch:int)->Optional[np.ndarray]:
    try:
        with ur.open(f'{DATA_ROOT}/{branch}_FinalStates.root') as rf:
            #fs_counts = rf['eventTree']['final_state_counts'].array()                        
            event = rf['eventTree']['event'].array()
            
            res = np.zeros(len(event), dtype=[
                ('event', 'i'),
                ('category', 'i')])
            
            res['category'] = rf['eventTree']['event_category'].array()
            res['event'] = event
        
        return res
    except:
        return None

In [191]:
print('Found errors in:')
for result in results:
    branch, location, proc = result['branch'], result['loc'], result['process']
    output = get_fs_output(branch)
    where = -1
    
    if output is None:
        state = 'INVALID'
    else:
        if np.sum(output['category'] == EVENT_CATEGORY_TRUE.OTHER) == 0:
            state = 'OK'
        else:
            state = 'CLASS_ERROR'
            where = output['event'][output['category'] == EVENT_CATEGORY_TRUE.OTHER][0]
        
    print(f'Branch {branch} | Loc {location} | Process {proc} | {state}' + ('' if where == -1 else f' (first at {where})'))

Found errors in:
Branch 0 | Loc 2f_Z_bhabhaNg | Process 2f_z_bhabhang | OK
Branch 1 | Loc 2f_Z_bhabhaNg | Process 2f_z_bhabhang | OK
Branch 2 | Loc 2f_Z_bhabhaNg | Process 2f_z_bhabhang | OK
Branch 3 | Loc 2f_Z_bhabhaNg | Process 2f_z_bhabhang | OK
Branch 4 | Loc 2f_Z_bhabhag | Process 2f_z_bhabhag | OK
Branch 5 | Loc 2f_Z_bhabhag | Process 2f_z_bhabhag | OK
Branch 6 | Loc 2f_Z_bhabhag | Process 2f_z_bhabhag | OK
Branch 7 | Loc 2f_Z_bhabhag | Process 2f_z_bhabhag | OK
Branch 8 | Loc 2f_Z_hadronic | Process 2f_z_h | OK
Branch 9 | Loc 2f_Z_hadronic | Process 2f_z_h | OK
Branch 10 | Loc 2f_Z_leptonic | Process 2f_z_l | OK
Branch 11 | Loc 2f_Z_leptonic | Process 2f_z_l | OK
Branch 12 | Loc 2f_Z_nuNg | Process 2f_z_nung | OK
Branch 13 | Loc 2f_Z_nuNg | Process 2f_z_nung | OK
Branch 14 | Loc 4f_WW_hadronic | Process 4f_ww_h | OK
Branch 15 | Loc 4f_WW_hadronic | Process 4f_ww_h | OK
Branch 16 | Loc 4f_WW_leptonic | Process 4f_ww_l | OK
Branch 17 | Loc 4f_WW_leptonic | Process 4f_ww_l | OK
Bra