In [None]:
# imports

import os
import sys
import json
import copy
import importlib
import numpy as np

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../../'))
sys.path.append(topdir)

from studies.clusters_2024.preprocessing.preprocessor import get_metype
from studies.clusters_2024.nmf.nmf_training import find_files
import studies.clusters_2024.nmf.nmf_testing_pattern as evaltools
importlib.reload(evaltools)

In [None]:
# era and layer settings

eras = ['B-v1', 'C-v1', 'D-v1', 'E-v1', 'E-v2', 'F-v1', 'G-v1', 'H-v1', 'I-v1', 'I-v2']
layers = ['BPix1', 'BPix2', 'BPix3', 'BPix4']

# initialize config
config = {}
config['eras'] = eras
config['layers'] = layers

In [None]:
# set path to input files

# settings
datadir = '/eos/user/l/llambrec/dialstools-output'
year = '2024'
dataset = 'ZeroBias'
reco = 'PromptReco'
mebase = 'PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_{}'

# find files corresponding to settings
input_files = {}
for era in eras:
    mainera, version = era.split('-')
    input_files[era] = {}
    for layer in layers:
        me = mebase.format(layer[-1])
        f = f'{dataset}-Run{year}{mainera}-{reco}-{version}-DQMIO-{me}.parquet'
        f = os.path.join(datadir, f)
        input_files[era][layer] = [f]

# existence check
missing = []
present = []
for era, values in input_files.items():
    for layer, files in values.items():
        for f in files:
            if not os.path.exists(f): missing.append(f)
            else: present.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
else:
    print(f'Found {len(present)} files.')
    
# add to config
config['input_files'] = input_files

In [None]:
# set path to nmf model files

modeldir = 'output_20250603/models'

# set path
nmf_files = {}
for era in eras:
    nmf_files[era] = {}
    for layer in layers: nmf_files[era][layer] = os.path.join(modeldir, f'nmf_model_{layer.upper()}_{era}.pkl')
    
# existence check
missing = []
for era in eras:
    for layer, f in nmf_files[era].items():
        if not os.path.exists(f): missing.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
    
# add to config
config['nmf_files'] = nmf_files

In [None]:
# settings for filtering

# min entries filter
min_entries_filter = {
    'BPix1': 0.5e6,
    'BPix2': 0.5e6/2,
    'BPix3': 0.5e6/3,
    'BPix4': 0.5e6/4
}

# OMS attribute filters
oms_filter_files = {}
for era in eras:
    oms_filter_files[era] = f'/eos/user/l/llambrec/pixelae/studies/clusters_2024/omsdata/omsdata_Run2024{era}.json'
for oms_filter_file in oms_filter_files.values():
    if not os.path.exists(oms_filter_file):
        raise Exception(f'File {oms_filter_file} does not exist.')
oms_filter_keys = ['run_number', 'lumisection_number', "beams_stable", "cms_active", "bpix_ready","fpix_ready", "tibtid_ready", "tob_ready", "tecp_ready", "tecm_ready"]

# add to config
config['min_entries_filter'] = min_entries_filter
config['oms_filter_files'] = oms_filter_files
config['oms_filter_keys'] = oms_filter_keys

In [None]:
# set path to automask data

do_automasking = False

automask_data_file = None
if do_automasking:
    automask_data_dir = '/eos/user/l/llambrec/pixelae/automasking/data/automask_data'
    automask_data_file = os.path.join(automask_data_dir, f'automask_2024.json')
    if not os.path.exists(automask_data_file):
        raise Exception(f'The automask data file {automask_data_file} does not exist.')
    config['automask_data_file'] = automask_data_file
    
# add to config
config['do_automasking'] = do_automasking
config['automask_data_file'] = automask_data_file

In [None]:
# set path to loss mask data

do_loss_masking = True

loss_mask_files = None
if do_loss_masking:
    loss_mask_files = {}
    for era in eras:
        loss_mask_era = era
        if '-part' in era: loss_mask_era = era.split('-part')[0]
        loss_mask_files[era] = {}
        for layer in layers:
            zerofrac_file = f'/eos/user/l/llambrec/pixelae/studies/clusters_2024/preprocessing/normdata/zerofrac_Run2024{loss_mask_era}_{get_metype(layer)}.npy'
            if not os.path.exists(zerofrac_file):
                raise Exception(f'The file {zerofrac_file} does not exist.')
            loss_mask_files[era][layer] = zerofrac_file

# add to config
config['do_loss_masking'] = do_loss_masking
config['loss_mask_files'] = loss_mask_files

In [None]:
# set evaluation parameters

# general
batch_size = 3000
threshold = 0.1
flag_patterns = [np.ones((1,8)), np.ones((2,4))]

# cleaning
do_per_layer_cleaning = True
cleaning_patterns = [np.ones((2,8))]
cleaning_threshold = 1.5

# add to config
config['batch_size'] = batch_size
config['threshold'] = threshold
config['flag_patterns'] = [el.tolist() for el in flag_patterns]
config['do_per_layer_cleaning'] = do_per_layer_cleaning
config['cleaning_patterns'] = [el.tolist() for el in cleaning_patterns]
config['cleaning_threshold'] = cleaning_threshold

In [None]:
# set output file

outputfile = 'flagged_lumisections.json'
outputfile = os.path.join('/eos/user/l/llambrec/pixelae/studies/clusters_2024/nmf', outputfile)
# note: cannot use getwcd() to define the directory above,
#       as it seems to give some kind of virtual directory that is ok in the notebook
#       but gives errors when used in the lxplus terminal or condor job.

config['outputfile'] = outputfile

In [None]:
# make the configuration

split_per_era = True
basename = 'temp_config'

if split_per_era:
    for era in eras:
        this_config = copy.deepcopy(config)
        this_config['eras'] = [era]
        this_config['input_files'] = {era: input_files[era]}
        this_config['nmf_files'] = {era: nmf_files[era]}
        this_config['oms_filter_files'] = {era: oms_filter_files[era]}
        this_config['outputfile'] = outputfile.replace('.json', f'_{era}.json')
        if this_config['loss_mask_files'] is not None:
            this_config['loss_mask_files'] = {era: loss_mask_files[era]}
        configfile = f'{basename}_{era}.json'
        with open(configfile, 'w') as f:
            json.dump(this_config, f)

else:
    configfile = f'{basename}.json'
    with open(configfile, 'w') as f:
        json.dump(config, f)

In [None]:
# optional: run the configuration

#flagged_run_numbers, flagged_ls_numbers = evaltools.evaluate(config)

# print flagged lumisections
flagged_run_numbers_unique = np.unique(flagged_run_numbers)
print(f'  Found {len(flagged_run_numbers)} flagged lumisections in {len(flagged_run_numbers_unique)} runs.')
for flagged_run_number, flagged_ls_number in zip(flagged_run_numbers, flagged_ls_numbers):
    print(f'- Run {flagged_run_number}, LS {flagged_ls_number}')