In [None]:
# imports

import os
import sys
import json
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../../'))
sys.path.append(topdir)

import tools.iotools as iotools
import tools.dftools as dftools
import plotting.plottools as plottools
from studies.ecal_occupancy_2024.plotting.plot_cluster_occupancy import plot_cluster_occupancy

In [None]:
# set path to files

# load occupancy from dqmio files

mes = ({
    #'EB': 'EcalBarrel-EBOccupancyTask-EBOT digi occupancy',
    'EE+': 'EcalEndcap-EEOccupancyTask-EEOT digi occupancy EE +',
    #'EE-': 'EcalEndcap-EEOccupancyTask-EEOT digi occupancy EE -'
})

eras = [
  #'Run2024A-v1', # only commissioning, no lumisections with physics flag set to True
  #'Run2024B-v1',
  'Run2024C-v1',
  #'Run2024D-v1',
  #'Run2024E-v1',
  'Run2024E-v2',
  #'Run2024F-v1',
  #'Run2024G-v1',
  #'Run2024H-v1',
  #'Run2024I-v1',
  #'Run2024I-v2',
  #'Run2024J-v1'  # pp reference run for heavy ion run; lower pileup and occupancy
]

datadir = '/eos/user/l/llambrec/dialstools-output-test'
year = '2024'
dataset = 'ZeroBias'
reco = 'PromptReco'

files = {}
for era in eras:
    files[era] = {}
    for melabel, mename in mes.items():
        mainera, version = era.split('-')
        f = f'{dataset}-{mainera}-{reco}-{version}-DQMIO-{mename}.parquet'
        f = os.path.join(datadir, f)
        files[era][melabel] = f

# existence check
missing = []
for era in eras:
    for melabel in mes.keys():
        f = files[era][melabel]
        if not os.path.exists(f):
            missing.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
else:
    print(f'Found {len(files)} files.')

**Part 1: calculate mask**

In [None]:
# calculate where an ME is zero for a significant fraction of an era

outputdir = 'normdata'
if not os.path.exists(outputdir): os.makedirs(outputdir)

for melabel, mename in mes.items():
    
    for era in eras:
        print(f'Now running on era {era}, ME {melabel}...')
        f = files[era][melabel]
        
        # read number of instances in this era
        dummy = iotools.read_parquet(f, columns=['run_number'])
        nlumis = len(dummy)
        print(f'Found {nlumis} lumisections in this era.')
        
        # split in batches
        batch_size = 10000
        num_batches = int((nlumis-1)/batch_size)+1
        batch_counts = []
        nentries = 0
        for batchidx in range(num_batches):
            print(f'  - Extracting batch {batchidx+1} / {num_batches}...')
            
            # get batch
            df = iotools.read_parquet(f, batch_size=batch_size, first_batch=batchidx, last_batch=batchidx)
            
            # do filtering
            df = df[df['entries'] > 0]
            print(f'    Found {len(df)} entries passing filters.')
            if len(df)==0: continue
            
            me_array, _, _ = dftools.get_mes(df, xbinscolumn='x_bin', ybinscolumn='y_bin', runcolumn='run_number', lumicolumn='ls_number')
            print(f'    Found array of shape {me_array.shape}.')
            
            # get zero counts
            counts = np.count_nonzero(me_array==0, axis=0)
            batch_counts.append(counts)
            nentries += len(df)
            
            # explicitly delete some variables for memory saving
            del df
            del me_array
            
        # make total zero fraction
        zerofrac = np.sum(np.array(batch_counts), axis=0) / nentries

        # plot result
        title = melabel + f', era {era} fraction of zeros'
        fig, ax = plottools.plot_hist_2d(zerofrac, figsize=(12,6), title=title, titlesize=15,
                xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                docolorbar=True, caxtitle='Number of clusters', caxrange=(1e-6, 1), caxtitlesize=15, caxtitleoffset=15,
                origin='lower')
        plt.show()
        plt.close()
        
        # save array
        outputfile = f'zerofrac_{era}_{melabel}.npy'
        outputfile = os.path.join(outputdir, outputfile)
        np.save(outputfile, zerofrac)

**Part 2: plot result**

In [None]:
outputdir = 'normdata'
    
for melabel, mename in mes.items():
    for era in eras:
        
        # load array
        f = f'zerofrac_{era}_{melabel}.npy'
        f = os.path.join(outputdir, f)
        zerofrac = np.load(f)
        
        # plot result
        title = melabel + f', era {era} fraction of zeros'
        fig, ax = plot_cluster_occupancy(zerofrac, figsize=(12,6), title=title, titlesize=15,
                xaxtitlesize=15, yaxtitlesize=15,
                ticklabelsize=12, colorticklabelsize=12,
                docolorbar=True, caxtitle='Digi occupancy',
                caxtitlesize=15, caxtitleoffset=15)
        plt.show()
        plt.close()
        
        # plot binary result
        threshold = 0.9
        title = melabel + f', era {era}ra {era} mask'
        fig, ax = plot_cluster_occupancy((zerofrac > threshold).astype(int), figsize=(12,6), title=title, titlesize=15,
                xaxtitlesize=15, yaxtitlesize=15,
                ticklabelsize=12, colorticklabelsize=12,
                docolorbar=True, caxtitle='Digi occupancy',
                caxtitlesize=15, caxtitleoffset=15)
        plt.show()
        plt.close()