In [15]:
import numpy as np
from phc import module_reload
module_reload('zhh')
from zhh import get_runtime_analysis, evaluate_runtime, get_adjusted_time_per_event, get_sample_chunk_splits
from typing import Optional
from math import floor, ceil

version = 'v1'
REPO_ROOT = '/afs/desy.de/user/b/bliewert/public/MarlinWorkdirs/ZHH'
DATA_ROOT = f'/nfs/dust/ilc/user/bliewert/zhh/PreselectionRuntime/{version}'
ILD_VERSION = 'ILD_l5_o1_v02'
PROD_NAME = '500-TDR_ws'

PROCESS_INDEX = '/afs/desy.de/user/b/bliewert/nfs/zhh/CreateRawIndex/v1/processes.npy'
SAMPLE_INDEX = '/afs/desy.de/user/b/bliewert/nfs/zhh/CreateRawIndex/v1/samples.npy'

processes = np.load(PROCESS_INDEX)
samples = np.load(SAMPLE_INDEX)

In [16]:
u = np.unique(processes['process'])
print(len(u))

99


In [17]:
u

array(['2f_z_bhabhag', '2f_z_bhabhang', '2f_z_h', '2f_z_l', '2f_z_nung',
       '4f_lowmee_sze_l', '4f_lowmee_szeorsw_l', '4f_sw_l', '4f_sw_sl',
       '4f_sze_l', '4f_sze_sl', '4f_szeorsw_l', '4f_sznu_l', '4f_sznu_sl',
       '4f_ww_h', '4f_ww_l', '4f_ww_sl', '4f_zz_h', '4f_zz_l', '4f_zz_sl',
       '4f_zzorww_h', '4f_zzorww_l', 'e1e1hh', 'e1e1qqh', 'e2e2hh',
       'e2e2qqh', 'e3e3hh', 'e3e3qqh', 'eeeeee', 'eeeell', 'eeeexx',
       'eeeeyy', 'eellxx', 'eellyy', 'eeveev', 'eevelv', 'eeveyx',
       'eevlev', 'eevllv', 'eevlyx', 'eexyev', 'eexylv', 'eexyyx',
       'llllee', 'llllll', 'llvelv', 'llveyx', 'llvlev', 'llvllv',
       'llvlyx', 'llxyev', 'llxylv', 'llxyyx', 'n1n1hh', 'n1n1qqh',
       'n23n23hh', 'n23n23qqh', 'qqhh', 'qqqqh', 'vvveev', 'vvvelv',
       'vvveyx', 'vvvlev', 'vvvllv', 'vvvlyx', 'vvvvxx', 'vvvvyy',
       'vvxyev', 'vvxylv', 'vvxyyx', 'xxveev', 'xxvelv', 'xxveyx',
       'xxvlev', 'xxvllv', 'xxvlyx', 'xxxxee', 'xxxxll', 'xxxxvv',
       'xxxxxx', 'xxxyev', 'x

In [18]:
evaluate_runtime(DATA_ROOT, 0)

(0,
 '2f_z_bhabhag',
 49,
 '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/2f_Z_bhabhag/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I250101.P2f_z_bhabhag.eL.pL.n001.d_dstm_10366_0.slcio',
 1721879914,
 1721879916,
 2,
 0)

In [19]:
runtime_analysis = get_runtime_analysis(DATA_ROOT=DATA_ROOT)
runtime_analysis

array([(  0, '2f_z_bhabhag', 49, '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/2f_Z_bhabhag/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I250101.P2f_z_bhabhag.eL.pL.n001.d_dstm_10366_0.slcio', 1.7218799e+09, 1.7218799e+09,   2., 0),
       (  1, '2f_z_bhabhag', 49, '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/2f_Z_bhabhag/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I250102.P2f_z_bhabhag.eL.pR.n001.d_dstm_10366_0.slcio', 1.7218799e+09, 1.7218799e+09,   3., 0),
       (  2, '2f_z_bhabhag', 49, '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/2f_Z_bhabhag/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I250104.P2f_z_bhabhag.eR.pL.n001.d_dstm_10366_0.slcio', 1.7218799e+09, 1.7218799e+09,   2., 0),
       (  3, '2f_z_bhabhag', 49, '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/2f_Z_bhabhag/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.

In [20]:
tD = runtime_analysis['tDuration']
print(f'{np.min(tD)} : {np.max(tD)}')

1.0 : 741.0


In [23]:
adjusted_time_per_event = get_adjusted_time_per_event(runtime_analysis, True, None, None)
adjusted_time_per_event

array([('2f_z_bhabhag',   2.5      , 49,   2.5      ),
       ('2f_z_bhabhang',   2.       , 49,   2.       ),
       ('2f_z_h',  43.       , 49,  43.       ),
       ('2f_z_l',   2.5      , 49,   2.5      ),
       ('2f_z_nung',   1.       , 49,   1.       ),
       ('4f_lowmee_sze_l',   2.25     , 49,   2.25     ),
       ('4f_lowmee_szeorsw_l',   1.25     , 49,   1.25     ),
       ('4f_sw_l',   3.25     , 49,   3.25     ),
       ('4f_sw_sl',  10.       , 49,  10.       ),
       ('4f_sze_l',   2.75     , 49,   2.75     ),
       ('4f_sze_sl',  16.5      , 49,  16.5      ),
       ('4f_szeorsw_l',   2.5      , 49,   2.5      ),
       ('4f_sznu_l',   2.5      , 49,   2.5      ),
       ('4f_sznu_sl',  37.5      , 49,  37.5      ),
       ('4f_ww_h',  12.5      , 49,  12.5      ),
       ('4f_ww_l',   3.       , 49,   3.       ),
       ('4f_ww_sl',  11.5      , 49,  11.5      ),
       ('4f_zz_h',  85.5      , 49,  85.5      ),
       ('4f_zz_l',   3.5      , 49,   3.5000002),
    

In [21]:
adjusted_time_per_event = get_adjusted_time_per_event(runtime_analysis, True, 5, 3)
adjusted_time_per_event

array([('2f_z_bhabhag',   2.5      , 49, 1.       ),
       ('2f_z_bhabhang',   2.       , 49, 1.       ),
       ('2f_z_h',  43.       , 49, 5.       ),
       ('2f_z_l',   2.5      , 49, 1.       ),
       ('2f_z_nung',   1.       , 49, 1.       ),
       ('4f_lowmee_sze_l',   2.25     , 49, 1.       ),
       ('4f_lowmee_szeorsw_l',   1.25     , 49, 1.       ),
       ('4f_sw_l',   3.25     , 49, 3.25     ),
       ('4f_sw_sl',  10.       , 49, 5.       ),
       ('4f_sze_l',   2.75     , 49, 1.       ),
       ('4f_sze_sl',  16.5      , 49, 5.       ),
       ('4f_szeorsw_l',   2.5      , 49, 1.       ),
       ('4f_sznu_l',   2.5      , 49, 1.       ),
       ('4f_sznu_sl',  37.5      , 49, 5.       ),
       ('4f_ww_h',  12.5      , 49, 5.       ),
       ('4f_ww_l',   3.       , 49, 3.       ),
       ('4f_ww_sl',  11.5      , 49, 5.       ),
       ('4f_zz_h',  85.5      , 49, 5.       ),
       ('4f_zz_l',   3.5      , 49, 3.5000002),
       ('4f_zz_sl',  99.       , 49, 5.   

In [14]:
n_before = 0
n_after = 0

for process in np.unique(samples['process']):
    entry = adjusted_time_per_event[adjusted_time_per_event["process"] == process]
    
    n_samples = np.sum(samples["process"] == process)
    n_samples_adj = n_samples*entry['tPE']
    print(f'{process} -> {n_samples} -> {n_samples_adj}')
    
    n_before += n_samples
    n_after += ceil(n_samples_adj)
    
print(f'Before {n_before} | After {n_after}')

2f_z_bhabhag -> 40 -> [40.]
2f_z_bhabhang -> 646 -> [646.]
2f_z_h -> 230 -> [1150.]
2f_z_l -> 24 -> [24.]
2f_z_nung -> 477 -> [477.]
4f_lowmee_sze_l -> 4 -> [4.]
4f_lowmee_szeorsw_l -> 4 -> [4.]
4f_sw_l -> 56 -> [56.]
4f_sw_sl -> 38 -> [190.]
4f_sze_l -> 269 -> [269.]
4f_sze_sl -> 36 -> [180.]
4f_szeorsw_l -> 36 -> [36.]
4f_sznu_l -> 5 -> [5.]
4f_sznu_sl -> 11 -> [55.]
4f_ww_h -> 55 -> [247.5]
4f_ww_l -> 7 -> [21.]
4f_ww_sl -> 43 -> [172.]
4f_zz_h -> 11 -> [55.]
4f_zz_l -> 2 -> [2.]
4f_zz_sl -> 41 -> [205.]
4f_zzorww_h -> 48 -> [240.]
4f_zzorww_l -> 15 -> [15.]
e1e1hh -> 121 -> [605.]
e1e1qqh -> 80 -> [400.]
e2e2hh -> 60 -> [300.]
e2e2qqh -> 42 -> [210.]
e3e3hh -> 59 -> [295.]
e3e3qqh -> 42 -> [210.]
eeeeee -> 4 -> [4.]
eeeell -> 4 -> [4.]
eeeexx -> 4 -> [17.]
eeeeyy -> 4 -> [20.]
eellxx -> 4 -> [20.]
eellyy -> 4 -> [20.]
eeveev -> 4 -> [4.]
eevelv -> 4 -> [12.]
eeveyx -> 10 -> [50.]
eevlev -> 4 -> [4.]
eevllv -> 5 -> [5.]
eevlyx -> 14 -> [49.]
eexyev -> 10 -> [30.]
eexylv -> 13 -> [48

In [9]:
samples[samples['process'] == 'eevlev']

array([(108628, 'eevlev', 'eevlev_LR', 14948, -1,  1, '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/6f_eeWW/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I108628.Peevlev.eL.pR.n001.d_dstm_10361_0.slcio'),
       (108629, 'eevlev', 'eevlev_RL', 10000,  1, -1, '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/6f_eeWW/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I108629.Peevlev.eR.pL.n001.d_dstm_10361_0.slcio'),
       (108630, 'eevlev', 'eevlev_RR', 10000,  1,  1, '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/6f_eeWW/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I108630.Peevlev.eR.pR.n001.d_dstm_10361_0.slcio'),
       (108627, 'eevlev', 'eevlev_LL', 10000, -1, -1, '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/6f_eeWW/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I108627.Peevlev.eL.pL.n001.d_dstm_10361_0.slcio')],
      dtype=[('run_

In [42]:
c = np.load('/nfs/dust/ilc/user/bliewert/zhh/CreatePreselectionChunks/v1/chunks.npy', allow_pickle=True)

In [43]:
c

array([('2f_z_bhabhang', '2f_z_bhabhang_LR', '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/2f_Z_bhabhaNg/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I250127.P2f_z_bhabhang.eL.pR.n005.d_dstm_10609_88.slcio', 18600, 1,    0),
       ('qqhh', 'qqhh_RL', '/pnfs/desy.de/ilc/prod/ilc/mc-2020/ild/dst-merged/500-TDR_ws/hh/ILD_l5_o1_v02/v02-02-03/00015747/000/rv02-02-03.sv02-02-03.mILD_l5_o1_v02.E500-TDR_ws.I403010.Pqqhh.eR.pL.n012.d_dstm_15747_103.slcio',  1197, 5,    0),
       ('qqhh', 'qqhh_RL', '/pnfs/desy.de/ilc/prod/ilc/mc-2020/ild/dst-merged/500-TDR_ws/hh/ILD_l5_o1_v02/v02-02-03/00015747/000/rv02-02-03.sv02-02-03.mILD_l5_o1_v02.E500-TDR_ws.I403010.Pqqhh.eR.pL.n012.d_dstm_15747_103.slcio',  1197, 5, 1197),
       ...,
       ('yyvlyx', 'yyvlyx_LR', '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/6f_ttbar/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I108673.Pyyvlyx.eL.pR.n006.d_dstm_10276_14.slcio',  1388, 5,

In [44]:
len(c)

12272

In [23]:
#s = samples[0]
s = samples[samples['run_id'] == 108628]
print(s)



[(108628, 'eevlev', 'eevlev_LR', 14948, -1, 1, '/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/6f_eeWW/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I108628.Peevlev.eL.pR.n001.d_dstm_10361_0.slcio')]


In [36]:
sample_chunk_splits = get_sample_chunk_splits(samples[:10], adjusted_time_per_event)

In [37]:
len(sample_chunk_splits)

33

In [38]:
res = {}
i = 0
for comb in sample_chunk_splits:
    res[i] = (comb['location'], comb['chunk_start'], comb['chunk_size'])
    i += 1

In [39]:
res

{0: ('/pnfs/desy.de/ilc/prod/ilc/ild/copy/dst-merged/500-TDR_ws/2f_Z_bhabhaNg/ILD_l5_o1_v02/v02-00-01/rv02-00-01.sv02-00-01.mILD_l5_o1_v02.E500-TDR_ws.I250127.P2f_z_bhabhang.eL.pR.n005.d_dstm_10609_88.slcio',
  0,
  18600),
 1: ('/pnfs/desy.de/ilc/prod/ilc/mc-2020/ild/dst-merged/500-TDR_ws/hh/ILD_l5_o1_v02/v02-02-03/00015747/000/rv02-02-03.sv02-02-03.mILD_l5_o1_v02.E500-TDR_ws.I403010.Pqqhh.eR.pL.n012.d_dstm_15747_103.slcio',
  0,
  1197),
 2: ('/pnfs/desy.de/ilc/prod/ilc/mc-2020/ild/dst-merged/500-TDR_ws/hh/ILD_l5_o1_v02/v02-02-03/00015747/000/rv02-02-03.sv02-02-03.mILD_l5_o1_v02.E500-TDR_ws.I403010.Pqqhh.eR.pL.n012.d_dstm_15747_103.slcio',
  1197,
  1197),
 3: ('/pnfs/desy.de/ilc/prod/ilc/mc-2020/ild/dst-merged/500-TDR_ws/hh/ILD_l5_o1_v02/v02-02-03/00015747/000/rv02-02-03.sv02-02-03.mILD_l5_o1_v02.E500-TDR_ws.I403010.Pqqhh.eR.pL.n012.d_dstm_15747_103.slcio',
  2394,
  1197),
 4: ('/pnfs/desy.de/ilc/prod/ilc/mc-2020/ild/dst-merged/500-TDR_ws/hh/ILD_l5_o1_v02/v02-02-03/00015747/000/rv0

In [25]:
for comb in sample_chunk_splits[['chunk_size', 'proc_pol']]:
    print(comb)

(8858, '2f_z_bhabhang_LR')
(8858, '2f_z_bhabhang_LR')
(1197, 'qqhh_RL')
(1197, 'qqhh_RL')
(1197, 'qqhh_RL')
(1197, 'qqhh_RL')
(1197, 'qqhh_RL')
(1600, 'e3e3hh_LR')
(1600, 'e3e3hh_LR')
(1600, 'e3e3hh_LR')
(1600, 'e3e3hh_LR')
(1280, 'n23n23qqh_LR')
(1280, 'n23n23qqh_LR')
(1280, 'n23n23qqh_LR')
(1280, 'n23n23qqh_LR')
(18462, '2f_z_nung_LR')
(18462, '2f_z_nung_LR')
(1644, 'n23n23qqh_RL')
(1644, 'n23n23qqh_RL')
(1644, 'n23n23qqh_RL')
(1644, 'n23n23qqh_RL')
(1644, 'n23n23qqh_RL')
(2000, 'yyvllv_LR')
(2000, 'yyvllv_LR')
(2000, 'yyvllv_LR')
(2000, 'yyvllv_LR')
(1033, '4f_lowmee_sze_l_LR')
(1033, '4f_lowmee_sze_l_LR')
(1680, 'n23n23qqh_RL')
(1680, 'n23n23qqh_RL')
(1680, 'n23n23qqh_RL')
(1680, 'n23n23qqh_RL')
(1400, 'yyveyx_LR')
(1400, 'yyveyx_LR')
(1400, 'yyveyx_LR')
(1400, 'yyveyx_LR')
(2621, '2f_z_h_LR')
(2621, '2f_z_h_LR')
(2621, '2f_z_h_LR')
(1400, 'yyveyx_LR')
(1400, 'yyveyx_LR')
(1400, 'yyveyx_LR')
(1400, 'yyveyx_LR')
(1954, 'xxxyyx_LR')
(1954, 'xxxyyx_LR')
(1015, 'yycyyc_RL')
(1015, 'yyc

In [20]:
samples['proc_pol']

array(['2f_z_bhabhang_LR', 'qqhh_RL', 'e3e3hh_LR', ..., '4f_sze_l_LR',
       'qqhh_LR', 'yyvlyx_LR'], dtype='<U64')

In [24]:
sample_chunk_splits['chunk_size']

array([8858, 8858, 1197, ..., 1388, 1388, 1388], dtype=int32)