In [None]:
# imports

import os
import sys
import json
import time
import joblib
import importlib
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../../'))
sys.path.append(topdir)

import tools.iotools as iotools
import tools.dftools as dftools
import tools.patternfiltering as patternfiltering
import tools.rebinning as rebinning
import tools.clustering as clustering
import plotting.plottools as plottools
from automasking.tools.automaskreader import AutomaskReader
from studies.pixel_clusters_2024.preprocessing.preprocessor import PreProcessor
from studies.pixel_clusters_2024.preprocessing.preprocessor import get_metype
from studies.pixel_clusters_2024.plotting.plot_cluster_occupancy import plot_cluster_occupancy
from studies.pixel_clusters_2024.nmf.nmf_testing_pattern import make_preprocessors

# for using dial for data retrieval on the fly
from cmsdials import Dials
from cmsdials.filters import LumisectionHistogram2DFilters, RunFilters
from studies.pixel_clusters_2024.nmf.nmf_testing_pattern import load_nmfs
from studies.pixel_clusters_2024.nmf.nmf_testing_pattern import get_dials_creds, get_data_from_dials

In [None]:
# set lumisection to plot

# example 1
#config_era = '2025C-v1'
#run = 392250
#lumi = 633

# example 2
config_era = '2024G-v1'
run = 385127
lumi = 793

In [None]:
# read config file

#config_file = None
config_file = f'output_20250714_consolidation/output_baseline/temp_config_{config_era.replace("2024", "")}.json'
#config_file = f'output_20250911_2025data/output_baseline/temp_config_{config_era}.json'
#config_file = f'output_20250911_2025data/output_tightbpix1/temp_config_{config_era}_part0.json'

if config_file is not None:
    
    # read config file
    with open(config_file, 'r') as f:
        config = json.load(f)
        
    # read eras and layers
    eras = config['eras']
    layers = config['layers']
    print(f'Found following eras: {eras}')
    print(f'Found following layers: {layers}')
    
    # read input files
    input_files = config['input_files']

    # make preprocessors and models
    print('Making preprocessors...')
    global_normalization = config.get('preprocessing_global_normalization', None)
    local_normalization = config.get('preprocessing_local_normalization', None)
    local_normalization = {this_era: local_normalization.replace('era_', 'era_2024') for this_era in eras} # older convention
    preprocessors = make_preprocessors(eras, layers,
                      global_normalization = global_normalization,
                      local_normalization = local_normalization)
    print('Loading NMFs...')
    nmfs = load_nmfs(config['nmf_files'])
    
    # get evaluation settings
    batch_size = config['batch_size']
    flagging_patterns = [np.array(el) for el in config['flagging_patterns']]
    flagging_threshold = config['flagging_threshold']
    #pattern_thresholds = config['pattern_thresholds']
    
    # for reading files in older convention
    pattern_thresholds = {
    'BPix1': [{"loss_threshold": 0.04, "pattern": np.ones((2, 16)).tolist(), "filter_threshold": 1.5}],
    'BPix2': [{"loss_threshold": 0.04, "pattern": np.ones((2, 16)).tolist(), "filter_threshold": 1.5}],
    'BPix3': [{"loss_threshold": 0.04, "pattern": np.ones((2, 16)).tolist(), "filter_threshold": 1.5}],
    'BPix4': [{"loss_threshold": 0.04, "pattern": np.ones((2, 16)).tolist(), "filter_threshold": 1.5}]
    }
    
    do_automasking = config['do_automasking']
    do_loss_masking = config['do_loss_masking']

    # make automask reader if needed
    automask_reader = None
    automask_map_preprocessors = None
    if do_automasking:
        automask_reader = AutomaskReader(config['automask_data_file'])
        automask_map_preprocessors = {}
        for layer in layers: automask_map_preprocessors[layer] = PreProcessor(f'PXLayer_{layer}')

    # make loss mask if needed
    loss_masks = None
    loss_mask_preprocessors = None
    if do_loss_masking:
        loss_masks = {}
        loss_mask_preprocessors = {}
        for this_era in eras:
            loss_masks[this_era] = {}
            for layer in layers:
                loss_mask_file = config['loss_masking_zero_frac_files'][this_era][layer]
                loss_mask_file = loss_mask_file.replace('clusters_2024', 'pixel_clusters_2024') # older convention
                loss_mask = np.load(loss_mask_file)
                loss_mask = (loss_mask < config['loss_masking_zero_frac_threshold'])
                loss_masks[this_era][layer] = loss_mask
        for layer in layers: loss_mask_preprocessors[layer] = PreProcessor(f'PXLayer_{layer}')

In [None]:
era = eras[0]

In [None]:
# initialize dials filters

# fixed settings
dataset = 'ZeroBias'
reco = 'PromptReco'
mebase = 'PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_{}'

# make dials filters corresponding to settings
input_files = {era: {}}
mainera, version = era.split('-', 1)
if not mainera.startswith('202'): mainera = '2024'+mainera
for layer in layers:
    me = mebase.format(layer[-1])
    dfilter = {
        'dataset': f'/{dataset}/Run{mainera}-{reco}-{version}/DQMIO',
        'me': me.replace('-', '/')
    }
    input_files[era][layer] = [dfilter]

In [None]:
# load data for plotting some random (or not random) examples

selected_runlumis = [
    (run, lumi)
]
selected_run_numbers = [el[0] for el in selected_runlumis]
selected_ls_numbers = [el[1] for el in selected_runlumis]
    
# load data
print('Loading data...')
dfs = {}
mes = {}
for layer in layers:
    input_file = input_files[era][layer][0]
    creds = get_dials_creds()
    dials = Dials(creds, workspace='tracker')
    this_dfs = []
    for run, lumi in selected_runlumis:
        dialsfilters = LumisectionHistogram2DFilters(
                  dataset = input_file["dataset"],
                  me = input_file["me"],
                  run_number = run,
                  ls_number = lumi
        )
        data = get_data_from_dials(dials, dialsfilters)
        this_dfs.append( data.to_pandas() )
    dfs[layer] = pd.concat(this_dfs, ignore_index=True)
    if len(dfs[layer])==0:
        msg = 'WARNING: empty dataframe returned...'
        print(msg)
    mes[layer], runs, lumis = dftools.get_mes(dfs[layer], xbinscolumn='x_bin', ybinscolumn='y_bin', runcolumn='run_number', lumicolumn='ls_number')

In [None]:
# plot example workfow
# (simplified and prettified version for DP note)

if len(selected_run_numbers) > 0:
    
    # preprocess and predict
    print('Processing...')
    mes_preprocessed = {}
    mes_pred = {}
    losses = {}
    for layer in layers:
        mes_preprocessed[layer] = preprocessors[era][layer].preprocess(dfs[layer])
        this_mes_preprocessed = np.copy(mes_preprocessed[layer])
        # (make a copy for some additional processing before inference
        #  that does not need to go in the plot)
        if preprocessors is not None:
            threshold = 5
            this_mes_preprocessed[this_mes_preprocessed > threshold] = threshold
        if preprocessors is not None:
            this_mes_preprocessed[this_mes_preprocessed == 0] = 1
        mes_pred[layer] = nmfs[era][layer].predict(this_mes_preprocessed)
        losses[layer] = np.square(mes_preprocessed[layer] - mes_pred[layer])
     
    # thresholding
    print('Thresholding...')
    losses_clustered = {}
    for layer in layers:
        try: this_pattern_thresholds = pattern_thresholds[layer]
        except: this_pattern_thresholds = pattern_thresholds # older convention
        losses_clustered[layer] = clustering.cluster_loss_multithreshold(losses[layer], this_pattern_thresholds)
        
    # make the plots
    print('Plotting...')
    for idx in range(len(selected_run_numbers)):
        run = runs[idx]
        lumi = lumis[idx]
        for layer in layers[:1]: # plot only layer 1
            me_orig = mes[layer][idx, :, :]
            me_preprocessed = mes_preprocessed[layer][idx, :, :]
            me_pred = mes_pred[layer][idx, :, :]
            loss = losses[layer][idx, :, :]
            loss_clustered = losses_clustered[layer][idx, :, :]
            
            # set zeros to small values
            # (in order to plot them as the bottom of the color scale
            # rather than white)
            me_orig[me_orig < 1e-6] = 1e-6
            me_preprocessed[me_preprocessed < 1e-6] = 1e-6
            me_pred[me_pred < 1e-6] = 1e-6
        
            # re-insert empty modules in the middle cross
            middle_1 = int(me_preprocessed.shape[0]/2)
            middle_2 = int(me_preprocessed.shape[1]/2)
            me_preprocessed = np.insert(me_preprocessed, [middle_1]*2, 0, axis=0)
            me_preprocessed = np.insert(me_preprocessed, [middle_2]*8, 0, axis=1)
            me_pred = np.insert(me_pred, [middle_1]*2, 0, axis=0)
            me_pred = np.insert(me_pred, [middle_2]*8, 0, axis=1)
            loss = np.insert(loss, [middle_1]*2, -1e6, axis=0)
            loss = np.insert(loss, [middle_2]*8, -1e6, axis=1)
            loss_clustered = loss_clustered.astype(float)
            loss_clustered = np.insert(loss_clustered, [middle_1]*2, -1e6, axis=0)
            loss_clustered = np.insert(loss_clustered, [middle_2]*8, -1e6, axis=1)
            
            # reset middle cross to empty in original histogram
            me_orig[middle_1:middle_1+2, :] = 0
            me_orig[:, middle_2:middle_2+8] = 0
    
            # initialize figure
            nrows = 2
            figheight = 12
            fig, axs = plt.subplots(ncols=3, nrows=nrows, figsize=(18, figheight), squeeze=False)
            
            # plot raw data
            fig, axs[0, 0] = plot_cluster_occupancy(me_orig, fig=fig, ax=axs[0, 0],
                   title='Raw', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   caxsoftmax=True,
                   docolorbar=True, caxtitle='Number of clusters',
                   caxtitlesize=15, caxtitleoffset=15)
            fig, axs[0, 1] = plot_cluster_occupancy(me_preprocessed, fig=fig, ax=axs[0, 1],
                   title='Input', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   docolorbar=True, caxtitle='Number of clusters\n(normalized)',
                   caxrange=(1e-6,2),
                   caxtitlesize=15, caxtitleoffset=30)
            fig, axs[1, 0] = plot_cluster_occupancy(me_pred, fig=fig, ax=axs[1, 0],
                   title='Reconstructed', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   docolorbar=True, caxtitle='Number of clusters\n(normalized)',
                   caxrange=(1e-6,2),
                   caxtitlesize=15, caxtitleoffset=30)
            fig, axs[1, 1] = plot_cluster_occupancy(loss, fig=fig, ax=axs[1, 1],
                   title='Loss', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   docolorbar=True, caxtitle='Loss',
                   caxrange=(0, 0.1),
                   caxtitlesize=15, caxtitleoffset=30)
            fig, axs[1, 2] = plot_cluster_occupancy(loss_clustered, fig=fig, ax=axs[1, 2],
                   title=f'Cleaned loss', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   docolorbar=True, caxtitle='Loss',
                   caxrange=(0, 1),
                   caxtitlesize=15, caxtitleoffset=15)
            fig.delaxes(axs[0, 2])
                
            # plot aesthetics
            if str(layer)=='BPix1':
                plt.subplots_adjust(hspace=-0.7)
                plt.subplots_adjust(wspace=0.4)
            if str(layer)=='BPix2': plt.subplots_adjust(hspace=0.3)
            if str(layer)=='BPix3': plt.subplots_adjust(hspace=0.3)
            if str(layer)=='BPix4': plt.subplots_adjust(hspace=0.3)
            title = r'$\bf{CMS}$ ' + r'$\it{Preliminary}$'
            year = era[:4]
            if not year.startswith('202'): year = '2024' # older convention
            conditions = f'{layer}, {year} (13.6 TeV)'
            if str(layer)=='BPix1':
                axs[0, 0].text(0.01, 1.4, title, fontsize=15, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.25, conditions, fontsize=15, transform=axs[0,0].transAxes)
            if str(layer)=='BPix2':
                axs[0, 0].text(0.01, 1.15, title, fontsize=12, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.1, conditions, fontsize=12, transform=axs[0,0].transAxes)
            if str(layer)=='BPix3':
                axs[0, 0].text(0.01, 1.15, title, fontsize=12, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.1, conditions, fontsize=12, transform=axs[0,0].transAxes)
            if str(layer)=='BPix4':
                axs[0, 0].text(0.01, 1.14, title, fontsize=12, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.09, conditions, fontsize=12, transform=axs[0,0].transAxes)
            plt.show()
            plt.close()

In [None]:
# plot examples of raw + loss
# (simplified and prettified version for DP note)

import studies.pixel_clusters_2024.plotting.plot_cluster_occupancy
importlib.reload(studies.pixel_clusters_2024.plotting.plot_cluster_occupancy)
from studies.pixel_clusters_2024.plotting.plot_cluster_occupancy import plot_cluster_occupancy

if len(selected_run_numbers) > 0:
    
    # preprocess and predict
    print('Processing...')
    mes_preprocessed = {}
    mes_pred = {}
    losses = {}
    for layer in layers:
        mes_preprocessed[layer] = preprocessors[era][layer].preprocess(dfs[layer])
        this_mes_preprocessed = np.copy(mes_preprocessed[layer])
        # (make a copy for some additional processing before inference
        #  that does not need to go in the plot)
        if preprocessors is not None:
            threshold = 5
            this_mes_preprocessed[this_mes_preprocessed > threshold] = threshold
        if preprocessors is not None:
            this_mes_preprocessed[this_mes_preprocessed == 0] = 1
        mes_pred[layer] = nmfs[era][layer].predict(this_mes_preprocessed)
        losses[layer] = np.square(mes_preprocessed[layer] - mes_pred[layer])
        
    # make the plots
    print('Plotting...')
    for idx in range(len(selected_run_numbers)):
        run = runs[idx]
        lumi = lumis[idx]
        for layer in layers:
            me_orig = mes[layer][idx, :, :]
            me_preprocessed = mes_preprocessed[layer][idx, :, :]
            me_pred = mes_pred[layer][idx, :, :]
            loss = losses[layer][idx, :, :]
            
            # set zeros to small values
            # (in order to plot them as the bottom of the color scale
            # rather than white)
            me_orig[me_orig < 1e-6] = 1e-6
        
            # re-insert empty modules in the middle cross
            middle_1 = int(me_preprocessed.shape[0]/2)
            middle_2 = int(me_preprocessed.shape[1]/2)
            loss = np.insert(loss, [middle_1]*2, -1e6, axis=0)
            loss = np.insert(loss, [middle_2]*8, -1e6, axis=1)
            
            # reset middle cross to empty in original histogram
            me_orig[middle_1:middle_1+2, :] = 0
            me_orig[:, middle_2:middle_2+8] = 0
    
            # initialize figure
            figheight = 6
            fig, axs = plt.subplots(ncols=1, nrows=2, figsize=(6, 10), squeeze=False)
            
            # plot raw data
            fig, axs[0, 0] = plot_cluster_occupancy(me_orig, fig=fig, ax=axs[0, 0],
                   title='Raw', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   caxsoftmax=True,
                   docolorbar=True, caxtitle='Number of clusters',
                   caxtitlesize=15, caxtitleoffset=15,
            )
        
        
            # plot loss
            fig, axs[1, 0] = plot_cluster_occupancy(loss, fig=fig, ax=axs[1, 0],
                   title='Loss', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   docolorbar=True, caxtitle='Loss',
                   caxrange=(0, 0.1),
                   caxtitlesize=15, caxtitleoffset=30,
            )
                
            # plot aesthetics
            if str(layer)=='BPix1': plt.subplots_adjust(hspace=-0.45)
            if str(layer)=='BPix2': plt.subplots_adjust(hspace=0.3)
            if str(layer)=='BPix3': plt.subplots_adjust(hspace=0.3)
            if str(layer)=='BPix4': plt.subplots_adjust(hspace=0.3)
            title = r'$\bf{CMS}$ ' + r'$\it{Preliminary}$'
            year = era[:4]
            if not year.startswith('202'): year = '2024' # older convention
            conditions = f'{layer}, {year} (13.6 TeV)'
            if str(layer)=='BPix1':
                axs[0, 0].text(0.01, 1.35, title, fontsize=12, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.25, conditions, fontsize=12, transform=axs[0,0].transAxes)
            if str(layer)=='BPix2':
                axs[0, 0].text(0.01, 1.15, title, fontsize=12, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.1, conditions, fontsize=12, transform=axs[0,0].transAxes)
            if str(layer)=='BPix3':
                axs[0, 0].text(0.01, 1.15, title, fontsize=12, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.1, conditions, fontsize=12, transform=axs[0,0].transAxes)
            if str(layer)=='BPix4':
                axs[0, 0].text(0.01, 1.14, title, fontsize=12, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.09, conditions, fontsize=12, transform=axs[0,0].transAxes)
            plt.show()
            plt.close()

In [None]:
# same as above but with different aspect ratio

import studies.pixel_clusters_2024.plotting.plot_cluster_occupancy
importlib.reload(studies.pixel_clusters_2024.plotting.plot_cluster_occupancy)
from studies.pixel_clusters_2024.plotting.plot_cluster_occupancy import plot_cluster_occupancy

if len(selected_run_numbers) > 0:
    
    # preprocess and predict
    print('Processing...')
    mes_preprocessed = {}
    mes_pred = {}
    losses = {}
    for layer in layers:
        mes_preprocessed[layer] = preprocessors[era][layer].preprocess(dfs[layer])
        this_mes_preprocessed = np.copy(mes_preprocessed[layer])
        # (make a copy for some additional processing before inference
        #  that does not need to go in the plot)
        if preprocessors is not None:
            threshold = 5
            this_mes_preprocessed[this_mes_preprocessed > threshold] = threshold
        if preprocessors is not None:
            this_mes_preprocessed[this_mes_preprocessed == 0] = 1
        mes_pred[layer] = nmfs[era][layer].predict(this_mes_preprocessed)
        losses[layer] = np.square(mes_preprocessed[layer] - mes_pred[layer])
        
    # make the plots
    print('Plotting...')
    for idx in range(len(selected_run_numbers)):
        run = runs[idx]
        lumi = lumis[idx]
        for layer in layers:
            me_orig = mes[layer][idx, :, :]
            me_preprocessed = mes_preprocessed[layer][idx, :, :]
            me_pred = mes_pred[layer][idx, :, :]
            loss = losses[layer][idx, :, :]
            
            # set zeros to small values
            # (in order to plot them as the bottom of the color scale
            # rather than white)
            me_orig[me_orig < 1e-6] = 1e-6
        
            # re-insert empty modules in the middle cross
            middle_1 = int(me_preprocessed.shape[0]/2)
            middle_2 = int(me_preprocessed.shape[1]/2)
            loss = np.insert(loss, [middle_1]*2, -1e6, axis=0)
            loss = np.insert(loss, [middle_2]*8, -1e6, axis=1)
            
            # reset middle cross to empty in original histogram
            me_orig[middle_1:middle_1+2, :] = 0
            me_orig[:, middle_2:middle_2+8] = 0
            
            aspect = 'equal' if layer=='BPix1' else 'auto'
    
            # initialize figure
            figheight = 6
            fig, axs = plt.subplots(ncols=1, nrows=2, figsize=(6, 10), squeeze=False)
            
            # plot raw data
            fig, axs[0, 0] = plot_cluster_occupancy(me_orig, fig=fig, ax=axs[0, 0],
                   title='Raw', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   caxsoftmax=True,
                   docolorbar=True, caxtitle='Number of clusters',
                   caxtitlesize=15, caxtitleoffset=15,
                   aspect=aspect
            )
        
        
            # plot loss
            fig, axs[1, 0] = plot_cluster_occupancy(loss, fig=fig, ax=axs[1, 0],
                   title='Loss', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   docolorbar=True, caxtitle='Loss',
                   caxrange=(0, 0.1),
                   caxtitlesize=15, caxtitleoffset=30,
                   aspect=aspect
            )
                
            # plot aesthetics
            if str(layer)=='BPix1': plt.subplots_adjust(hspace=-0.45)
            else: plt.subplots_adjust(hspace=0.3)
            title = r'$\bf{CMS}$ ' + r'$\it{Preliminary}$'
            year = era[:4]
            if not year.startswith('202'): year = '2024' # older convention
            conditions = f'{layer}, {year} (13.6 TeV)'
            if str(layer)=='BPix1':
                axs[0, 0].text(0.01, 1.4, title, fontsize=15, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.25, conditions, fontsize=15, transform=axs[0,0].transAxes)
            else:
                axs[0, 0].text(0.01, 1.22, title, fontsize=15, transform=axs[0, 0].transAxes)
                axs[0, 0].text(0.01, 1.15, conditions, fontsize=15, transform=axs[0,0].transAxes)
            plt.show()
            plt.close()