In [None]:
# imports

import os
import sys
import json
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../../'))
sys.path.append(topdir)

import tools.iotools as iotools
import tools.dftools as dftools
import plotting.plottools as plottools
from studies.ecal_occupancy_2024.plotting.plot_cluster_occupancy import plot_cluster_occupancy

import preprocessor
importlib.reload(preprocessor)
from preprocessor import PreProcessor

In [None]:
# set path to files

# settings
datadir = '/eos/user/l/llambrec/dialstools-output-test'
year = '2024'
eras = {
    'A': ['v1'],
    'B': ['v1'],
    'C': ['v1'],
    'D': ['v1'],
    'E': ['v1', 'v2'],
    'F': ['v1'],
    'G': ['v1'],
    'H': ['v1'],
    'I': ['v1', 'v2'],
    'J': ['v1']
}
dataset = 'ZeroBias'
reco = 'PromptReco'
me = 'EcalBarrel-EBOccupancyTask-EBOT digi occupancy'
metype = 'EB'

# find files corresponding to settings
files = {}
for era, versions in eras.items():
    for version in versions:
        f = f'{dataset}-Run{year}{era}-{reco}-{version}-DQMIO-{me}.parquet'
        f = os.path.join(datadir, f)
        files[f'{era}-{version}'] = f

# existence check
missing = []
for f in files.values():
    if not os.path.exists(f):
        missing.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
else:
    print(f'Found {len(files)} files.')

In [None]:
# pick an era and read the corresponding file

era = 'B-v1'
df = iotools.read_parquet(files[era], verbose=True)
mes, runs, lumis = dftools.get_mes(df, xbinscolumn='x_bin', ybinscolumn='y_bin', runcolumn='run_number', lumicolumn='ls_number')
print(f'Shape of ME array: {mes.shape}')

In [None]:
# get the average occupancy map

avgmefile = f'normdata/avgme_Run2024{era}_{metype}.npy'
avgme = np.load(avgmefile)

# make a plot
title = me.split('-')[-1]+ '\n' + f'Era {era} mean'
fig, ax = plot_cluster_occupancy(avgme, figsize=(12,6), title=title, titlesize=15,
                xaxtitlesize=15, yaxtitlesize=15,
                ticklabelsize=12, colorticklabelsize=12,
                docolorbar=True, caxtitle='Digi occupancy', caxtitlesize=15, caxtitleoffset=15)

In [None]:
# get a mask

zerofracfile = f'normdata/zerofrac_Run2024{era}_{metype}.npy'
zerofrac = np.load(zerofracfile)
mask = (zerofrac < 0.9) 

# make a plot
title = me.split('-')[-1]+ '\n' + f'Era {era} mask'
fig, ax = plot_cluster_occupancy(mask.astype(int), figsize=(12,6), title=title, titlesize=15,
                xaxtitlesize=15, yaxtitlesize=15,
                ticklabelsize=12, colorticklabelsize=12,
                docolorbar=True, caxtitle='Digi occupancy', caxtitlesize=15, caxtitleoffset=15)

In [None]:
# make a preprocessor

preprocessor = PreProcessor(metype, global_norm=None, local_norm=avgme, mask=mask, avgunity=True)

In [None]:
# do preprocessing

mes_preprocessed = preprocessor.preprocess(df)

In [None]:
# plot some examples

all_ids = np.arange(len(runs))
selected_ids = all_ids[df['entries'].values > 0]
random_ids = np.random.choice(selected_ids, size=5, replace=False)
for random_idx in random_ids:
    
    run = runs[random_idx]
    lumi = lumis[random_idx]
    me_orig = mes[random_idx]
    me_prep = mes_preprocessed[random_idx]
    
    fig, axs = plt.subplots(ncols=2, figsize=(12, 6))
    fig, axs[0] = plot_cluster_occupancy(me_orig, fig=fig, ax=axs[0],
                   title='Raw', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   docolorbar=True, caxtitle='Digi occupancy', caxtitlesize=15, caxtitleoffset=15)
    fig, axs[1] = plot_cluster_occupancy(me_prep, fig=fig, ax=axs[1],
                   title='Preprocessed', titlesize=15,
                   xaxtitlesize=15, yaxtitlesize=15,
                   ticklabelsize=12, colorticklabelsize=12,
                   docolorbar=True, caxtitle='Digi occupancy\n(normalized)', caxrange=(1e-6,2), caxtitlesize=15, caxtitleoffset=30)
    plt.subplots_adjust(wspace=0.5)
    title = me.split('-')[-1] + f', Run {run}, LS {lumi}'
    axs[0].text(0.01, 1.3, title, fontsize=15, transform=axs[0].transAxes)