In [None]:
# imports

import os
import sys
import json
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../../'))
sys.path.append(topdir)

import tools.iotools as iotools
import tools.dftools as dftools
import plotting.plottools as plottools

In [None]:
# set path to files

# load occupancy from dqmio files

mes = ({
    'PXLayer_1': 'PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_1',
    'PXLayer_2': 'PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_2',
    'PXLayer_3': 'PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_3',
    'PXLayer_4': 'PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_4',
})

eras = [
  #'Run2024A-v1', # only commissioning, no lumisections with physics flag set to True
  'Run2024B-v1',
  'Run2024C-v1',
  'Run2024D-v1',
  'Run2024E-v1',
  'Run2024E-v2',
  'Run2024F-v1',
  'Run2024G-v1',
  'Run2024H-v1',
  'Run2024I-v1',
  'Run2024I-v2',
  #'Run2024J-v1'  # pp reference run for heavy ion run; lower pileup and occupancy
]

datadir = '/eos/user/l/llambrec/dialstools-output'
year = '2024'
dataset = 'ZeroBias'
reco = 'PromptReco'

files = {}
for era in eras:
    files[era] = {}
    for melabel, mename in mes.items():
        mainera, version = era.split('-')
        f = f'{dataset}-{mainera}-{reco}-{version}-DQMIO-{mename}.parquet'
        f = os.path.join(datadir, f)
        files[era][melabel] = f

# existence check
missing = []
for era in eras:
    for melabel in mes.keys():
        f = files[era][melabel]
        if not os.path.exists(f):
            missing.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
else:
    print(f'Found {len(files)} files.')

**Part 1: calculate norm**

In [None]:
# calculate average per era and per ME

outputdir = 'normdata'
if not os.path.exists(outputdir): os.makedirs(outputdir)
    
for era in eras:
    for melabel, mename in mes.items():
        print(f'Now running on era {era}, ME {melabel}...')
        f = files[era][melabel]
        
        # split in batches
        batch_size = 10000
        dummy = iotools.read_parquet(f, columns=['run_number'])
        nrows = len(dummy)
        num_batches = int((nrows-1)/batch_size)+1
        batch_sums = []
        for batchidx in range(num_batches):
            print(f'  - Extracting batch {batchidx+1} / {num_batches}...')
            
            # get batch
            df = iotools.read_parquet(f, batch_size=batch_size, first_batch=batchidx, last_batch=batchidx)
            me_array, _, _ = dftools.get_mes(df, xbinscolumn='x_bin', ybinscolumn='y_bin', runcolumn='run_number', lumicolumn='ls_number')
            print(f'    Found array of shape {me_array.shape}')
            
            # get sum
            mesum = np.sum(me_array, axis=0)
            batch_sums.append(mesum)
            
            # explicitly delete some variables for memory saving
            del df
            del me_array
            
        # make total average over batches
        avgme = np.sum(np.array(batch_sums), axis=0) / nrows
        # normalize to mean value 1
        avgme = avgme / np.mean(avgme)
        # set small values to zero (so they will be automatically masked)
        avgme[avgme < 0.1] = 0
        
        # plot result
        title = mename.split('-')[-1]+ '\n' + f'Era {era} mean'
        fig, ax = plottools.plot_hist_2d(avgme, figsize=(12,6), title=title, titlesize=15,
                xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                docolorbar=True, caxtitle='Number of clusters', caxrange=(1e-6, 2), caxtitlesize=15, caxtitleoffset=15,
                origin='lower')
        plt.show()
        plt.close()
        
        # save array
        outputfile = f'avgme_{era}_{melabel}.npy'
        outputfile = os.path.join(outputdir, outputfile)
        np.save(outputfile, avgme)

**Part 2: plot result**

In [None]:
outputdir = 'normdata'
    
for era in eras:
    for melabel, mename in mes.items():
        
        # load array
        f = f'avgme_{era}_{melabel}.npy'
        f = os.path.join(outputdir, f)
        avgme = np.load(f)
        
        # plot result
        title = mename.split('-')[-1]+ '\n' + f'Era {era} mean'
        fig, ax = plottools.plot_hist_2d(avgme, figsize=(12,6), title=title, titlesize=15,
                xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                docolorbar=True, caxtitle='Number of clusters', caxrange=(1e-6, 2), caxtitlesize=15, caxtitleoffset=15,
                origin='lower')
        plt.show()
        plt.close()