In [None]:
# imports

import os
import sys
import json
import time
import joblib
import importlib
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../../'))
sys.path.append(topdir)

import tools.iotools as iotools
import tools.dftools as dftools
import tools.patternfiltering as patternfiltering
import tools.rebinning as rebinning
import plotting.plottools as plottools
from automasking.tools.automaskreader import AutomaskReader
from studies.clusters_2024.preprocessing.preprocessor import PreProcessor
import studies.clusters_2024.nmf.nmf_testing_pattern as evaltools

In [None]:
# set path to input files

era = 'B-v1'
layers = [1, 2, 3, 4]

# settings
datadir = '/eos/user/l/llambrec/dialstools-output'
year = '2024'
dataset = 'ZeroBias'
reco = 'PromptReco'
mebase = 'PixelPhase1-Phase1_MechanicalView-PXBarrel-'
mebase += 'clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_'

# find files corresponding to settings
input_files = {}
mainera, version = era.split('-', 1)
input_files[era] = {}
for layer in layers:
    f = f'{dataset}-Run{year}{mainera}-{reco}-{version}-DQMIO-{mebase}{layer}.parquet'
    f = os.path.join(datadir, f)
    input_files[era][layer] = f
    
# existence check
missing = []
present = []
for _, values in input_files.items():
    for layer, f in values.items():
        if not os.path.exists(f): missing.append(f)
        else: present.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
else:
    print(f'Found {len(present)} files.')

In [None]:
# load nmf models

modeldir = 'output_test/models'

# set path
nmf_files = {}
nmf_files[era] = {}
for layer in layers:
    # new convention
    nmf_files[era][layer] = os.path.join(modeldir, f'nmf_model_BPIX{layer}_{era}.pkl')
    # old convention
    #nmf_files[era][layer] = os.path.join(modeldir, f'PXLayer_{layer}', f'nmf_model_era{era}.pkl')
    
# existence check
missing = []
for layer, f in nmf_files[era].items():
    if not os.path.exists(f): missing.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
    

# load models
nmfs = {}
nmfs[era] = {}
for layer in layers:
    nmf_file = nmf_files[era][layer]
    nmf = joblib.load(nmf_file)
    nmfs[era][layer] = nmf

In [None]:
# make preprocessors

preprocessor_era = era
if '-part' in era: preprocessor_era = era.split('-part')[0]
preprocessors = evaltools.make_preprocessors([preprocessor_era], layers)

In [None]:
# settings for automasking

do_automasking = False
automask_data_file = None

if do_automasking:
    automask_reader = AutomaskReader(config['automask_data_file'])
    automask_map_preprocessors = {}
    for layer in layers: automask_map_preprocessors[layer] = PreProcessor(f'PXLayer_{layer}')

In [None]:
# settings for loss masking

do_loss_masking = True

if do_loss_masking:
    loss_mask_era = era
    if '-part' in era: loss_mask_era = era.split('-part')[0]
    loss_masks = {}
    loss_masks[era] = {}
    loss_mask_preprocessors = {}
    for layer in layers:
        zerofrac_file = f'../preprocessing/normdata/zerofrac_Run2024{loss_mask_era}_PXLayer_{layer}.npy'
        zerofrac = np.load(zerofrac_file)
        loss_mask = (zerofrac < 0.9)
        loss_masks[era][layer] = loss_mask
        loss_mask_preprocessors[layer] = PreProcessor(f'PXLayer_{layer}')

In [None]:
# other evaluation settings

threshold = 0.1
flag_patterns = [np.ones((1,8)), np.ones((2,4))]

# cleaning
do_per_layer_cleaning = True
cleaning_patterns = [np.ones((2,8))]
cleaning_threshold = 1.5

In [None]:
# load available run and lumisection numbers

dftemp = iotools.read_parquet(input_files[era][layers[0]], columns=['run_number', 'ls_number', 'entries'])
dftemp = dftemp[dftemp['entries']>0.5e6]
available_run_numbers = dftemp['run_number'].values
available_ls_numbers = dftemp['ls_number'].values
unique_runs = np.unique(available_run_numbers)

print('Available runs:')
print(unique_runs)

In [None]:
# load flagged run and lumisection numbers

#flagged_ls_file = f'output_20250528/flagged_lumisections_withlossmask_loosecleaning_dcsfilters/flagged_lumisections_{era}.json'
flagged_ls_file = f'flagged_lumisections_{era}.json'

with open(flagged_ls_file, 'r') as f:
    info = json.load(f)
flagged_run_numbers = np.array(info['flagged_run_numbers'])
flagged_ls_numbers = np.array(info['flagged_ls_numbers'])
filter_results = info['filter_results']

# printouts
print(f'Found {len(flagged_run_numbers)} flagged lumisections:')
for run_number, ls_number in zip(flagged_run_numbers, flagged_ls_numbers):
    print(f'  - Run {run_number}, LS {ls_number}')

In [None]:
# find out why a given lumisection did not pass the selections

test_runlumis = [(385168, 415)]
for runlumi in test_runlumis:
    failkeys = []
    for key, values in filter_results.items():
        if list(runlumi) in values: failkeys.append(key)
    if len(failkeys)==0:
        print(f'Lumisection {runlumi} not found in filter info, i.e. it did not seem to have failed any of the selections.')
    else:
        print(f'Lumisection {runlumi} failed the following selections:')
        print(failkeys)

In [None]:
# plot some random (or not random) examples

# general settings
do_extended_loss_plots = True
do_combined_loss_plot = True

# random lumisections
nplot = 1
random_ids = np.random.choice(len(available_run_numbers), size=min(nplot, len(available_run_numbers)), replace=False)
selected_run_numbers = available_run_numbers[random_ids]
selected_ls_numbers = available_ls_numbers[random_ids]
#random_ids = np.random.choice(len(flagged_run_numbers), size=min(nplot, len(flagged_run_numbers)), replace=False)
#selected_run_numbers = flagged_run_numbers[random_ids]
#selected_ls_numbers = flagged_ls_numbers[random_ids]

# alternative: specific selected lumisections
selected_runlumis = [(378999, 2)]
selected_run_numbers = [el[0] for el in selected_runlumis]
selected_ls_numbers = [el[1] for el in selected_runlumis]

if len(selected_run_numbers) > 0:
    
    # calculate random indices and load data
    print('Loading data...')
    dfs = {}
    mes = {}
    for layer in layers:
        dfs[layer] = iotools.read_lumisections(input_files[era][layer], selected_run_numbers, selected_ls_numbers)
        mes[layer], runs, lumis = dftools.get_mes(dfs[layer], xbinscolumn='x_bin', ybinscolumn='y_bin', runcolumn='run_number', lumicolumn='ls_number')
    
    # preprocess and predict
    print('Processing...')
    mes_preprocessed = {}
    mes_pred = {}
    losses = {}
    losses_binary = {}
    for layer in layers:
        mes_preprocessed[layer] = preprocessors[preprocessor_era][layer].preprocess(dfs[layer])
        mes_pred[layer] = nmfs[era][layer].predict(mes_preprocessed[layer])
        losses[layer] = np.square(mes_preprocessed[layer] - mes_pred[layer])
        losses_binary[layer] = (losses[layer] > threshold).astype(int)
    
    # automasking
    if do_automasking:
        print('Applying automasks...')
        for layer in layers:
            subsystem = f'BPix{layer}'
            automask_maps = automask_reader.get_automask_maps_for_ls(selected_run_numbers, selected_ls_numbers, subsystem, invert=True)
            automask_maps = automask_map_preprocessors[layer].preprocess_mes(automask_maps, None, None)
            losses[layer] = np.multiply(losses[layer], automask_maps)
            losses_binary[layer] = np.multiply(losses_binary[layer], automask_maps)
            
    # manual masking
    if do_loss_masking:
        print('Applying loss mask...')
        for layer in layers:
            mask = loss_masks[era][layer]
            mask = np.expand_dims(mask, 0)
            mask = loss_mask_preprocessors[layer].preprocess_mes(mask, None, None)
            losses[layer] = np.multiply(losses[layer], mask)
            losses_binary[layer] = np.multiply(losses_binary[layer], mask)
            
    # cleaning
    if do_per_layer_cleaning:
        print('Cleaning loss maps')
        losses_binary_cleaned = {}
        for layer in layers:
            losses_binary_cleaned[layer] = patternfiltering.filter_any_pattern(losses_binary[layer], cleaning_patterns, threshold=cleaning_threshold)
    
    # make rebinned and overlayed binary loss map
    target_shape = losses[layers[0]].shape[1:3]
    losses_binary_rebinned = {}
    losses_binary_combined = np.zeros(losses[layers[0]].shape)
    for layer in layers:
        source = losses_binary[layer]
        if do_per_layer_cleaning: source = losses_binary_cleaned[layer]
        losses_binary_rebinned[layer] = rebinning.rebin_keep_clip(source, target_shape, 1, mode='cv2')
        losses_binary_combined += losses_binary_rebinned[layer]
    losses_binary_combined = (losses_binary_combined >= 2).astype(int)
        
    # make the plots
    print('Plotting...')
    for idx in range(len(selected_run_numbers)):
        run = runs[idx]
        lumi = lumis[idx]
        for layer in layers:
            me_orig = mes[layer][idx, :, :]
            me_preprocessed = mes_preprocessed[layer][idx, :, :]
            me_pred = mes_pred[layer][idx, :, :]
            loss = losses[layer][idx, :, :]
            loss_binary = losses_binary[layer][idx, :, :]
            loss_binary_cleaned = losses_binary_cleaned[layer][idx, :, :]
            loss_binary_rebinned = losses_binary_rebinned[layer][idx, :, :]
    
            # initialize figure
            nrows = 1
            figheight = 6
            if do_extended_loss_plots:
                nrows = 2
                figheight = 12
            fig, axs = plt.subplots(ncols=4, nrows=nrows, figsize=(24, figheight), squeeze=False)
            
            # plot raw data
            fig, axs[0, 0] = plottools.plot_hist_2d(me_orig, fig=fig, ax=axs[0, 0],
                   title='Raw', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                   docolorbar=True, caxtitle='Number of clusters',
                   caxtitlesize=15, caxtitleoffset=15,
                   origin='lower')
        
            # overlay automask
            if do_automasking:
                subsystem = f'BPix{layer}'
                automask_map = amreader.get_automask_map_for_ls(run, lumi, subsystem)
                ids = np.nonzero(automask_map.astype(int))
                for yidx, xidx in zip(ids[0], ids[1]):
                    linewidth = 1 if layer>=3 else 2
                    patch = mpl.patches.Rectangle((xidx-0.5, yidx-0.5), 1, 1,
                                      edgecolor='red', linewidth=linewidth,
                                      facecolor='none')
                    axs[0, 0].add_patch(patch)
        
            # plot preprocessed, reconstructed and loss
            fig, axs[0, 1] = plottools.plot_hist_2d(me_preprocessed, fig=fig, ax=axs[0, 1],
                   title='Input', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                   docolorbar=True, caxtitle='Number of clusters\n(normalized)',
                   caxrange=(1e-6,2),
                   caxtitlesize=15, caxtitleoffset=30,
                   origin='lower')
            fig, axs[0, 2] = plottools.plot_hist_2d(me_pred, fig=fig, ax=axs[0, 2],
                   title='Reconstructed', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                   docolorbar=True, caxtitle='Number of clusters\n(normalized)',
                   caxrange=(1e-6,2),
                   caxtitlesize=15, caxtitleoffset=30,
                   origin='lower')
            fig, axs[0, 3] = plottools.plot_hist_2d(loss, fig=fig, ax=axs[0, 3],
                   title='Loss', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                   docolorbar=True, caxtitle='Loss',
                   caxrange=(0, 0.1),
                   caxtitlesize=15, caxtitleoffset=30,
                   origin='lower')
            
            # optional: plot more post-processing steps with the loss map
            if do_extended_loss_plots:
                fig, axs[1, 0] = plottools.plot_hist_2d(loss_binary, fig=fig, ax=axs[1, 0],
                   title=f'Binary loss', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                   docolorbar=True, caxtitle='Loss',
                   caxrange=(0, 1),
                   caxtitlesize=15, caxtitleoffset=15,
                   origin='lower')
                fig, axs[1, 1] = plottools.plot_hist_2d(loss_binary_cleaned, fig=fig, ax=axs[1, 1],
                   title=f'Cleaned loss', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                   docolorbar=True, caxtitle='Loss',
                   caxrange=(0, 1),
                   caxtitlesize=15, caxtitleoffset=15,
                   origin='lower')
                fig, axs[1, 2] = plottools.plot_hist_2d(loss_binary_rebinned, fig=fig, ax=axs[1, 2],
                   title=f'Rebinned loss', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                   docolorbar=True, caxtitle='Loss',
                   caxrange=(0, 1),
                   caxtitlesize=15, caxtitleoffset=15,
                   origin='lower')
                fig.delaxes(axs[1, 3])
                
            
            # plot aesthetics
            plt.subplots_adjust(wspace=0.5)
            if str(layer)=='1': plt.subplots_adjust(hspace=-0.75)
            if str(layer)=='2': plt.subplots_adjust(hspace=-0.4)
            title = f'Run {run}, LS {lumi}, layer {layer}'
            axs[0, 0].text(0.01, 1.3, title, fontsize=15, transform=axs[0, 0].transAxes)
            plt.show()
            plt.close()
            
        # plot the combined loss map
        if do_combined_loss_plot:
            loss_binary_combined = losses_binary_combined[idx, :, :]
            fig, ax = plt.subplots()
            fig, ax = plottools.plot_hist_2d(loss_binary_combined, fig=fig, ax=ax,
                   title='Combined binary loss', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                   docolorbar=True, caxtitle='Loss',
                   caxrange=(0, 1),
                   caxtitlesize=15, caxtitleoffset=15,
                   origin='lower')
            title = f'Run {run}, LS {lumi}'
            ax.text(0.01, 1.3, title, fontsize=15, transform=ax.transAxes)
            plt.show()
            plt.close()