In [None]:
# imports

import os
import sys
import json
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../../'))
sys.path.append(topdir)

import tools.iotools as iotools
import tools.dftools as dftools
import plotting.plottools as plottools

from preprocessor import PreProcessor

In [None]:
# set path to files

# settings
datadir = '/eos/user/l/llambrec/dialstools-output'
year = '2024'
eras = {
    'A': ['v1'],
    'B': ['v1'],
    'C': ['v1'],
    'D': ['v1'],
    'E': ['v1', 'v2'],
    'F': ['v1'],
    'G': ['v1'],
    'H': ['v1'],
    'I': ['v1', 'v2'],
    'J': ['v1']
}
dataset = 'ZeroBias'
reco = 'PromptReco'
mebase = 'PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_{}'
layer = 1
me = mebase.format(layer)

# find files corresponding to settings
files = {}
for era, versions in eras.items():
    for version in versions:
        f = f'{dataset}-Run{year}{era}-{reco}-{version}-DQMIO-{me}.parquet'
        f = os.path.join(datadir, f)
        files[f'{era}-{version}'] = f

# existence check
missing = []
for f in files.values():
    if not os.path.exists(f):
        missing.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
else:
    print(f'Found {len(files)} files.')

In [None]:
# pick an era and read the corresponding file

era = 'B-v1'
df = iotools.read_parquet(files[era], verbose=True)
mes, runs, lumis = dftools.get_mes(df, xbinscolumn='x_bin', ybinscolumn='y_bin', runcolumn='run_number', lumicolumn='ls_number')
print(f'Shape of ME array: {mes.shape}')

In [None]:
# load norm json

metype = f'PXLayer_{layer}'
normfile = f'normdata/normdata_Run2024{era}_{metype}.json'
with open(normfile, 'r') as f:
    norm_info = json.load(f)
    
# divide the norm by the number of bins in order to normalize mean instead of sum
nbins = mes.shape[1] * mes.shape[2]
norm_info['norm'] = [val / nbins for val in norm_info['norm']]

In [None]:
# get the average occupancy map

metype = f'PXLayer_{layer}'
avgmefile = f'normdata/avgme_Run2024{era}_{metype}.npy'
avgme = np.load(avgmefile)

# make a plot
title = me.split('-')[-1]+ '\n' + f'Era {era} mean'
fig, ax = plottools.plot_hist_2d(avgme, figsize=(12,6), title=title, titlesize=15,
                xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None, caxrange=None,
                docolorbar=True, caxtitle='Number of clusters', caxtitlesize=15, caxtitleoffset=15,
                origin='lower')

In [None]:
# make a preprocessor

metype = f'PXLayer_{layer}'
preprocessor = PreProcessor(metype, global_norm=norm_info, local_norm=avgme)

In [None]:
# do preprocessing

mes_preprocessed = preprocessor.preprocess(df)

In [None]:
# plot some examples

random_ids = np.random.choice(np.arange(len(runs)), size=15, replace=False)
for random_idx in random_ids:
    
    run = runs[random_idx]
    lumi = lumis[random_idx]
    me_orig = mes[random_idx]
    me_prep = mes_preprocessed[random_idx]
    
    fig, axs = plt.subplots(ncols=2, figsize=(12, 6))
    fig, axs[0] = plottools.plot_hist_2d(me_orig, fig=fig, ax=axs[0],
                   title='Raw', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None, caxrange=None,
                   docolorbar=True, caxtitle='Number of clusters', caxtitlesize=15, caxtitleoffset=15,
                   origin='lower')
    fig, axs[1] = plottools.plot_hist_2d(me_prep, fig=fig, ax=axs[1],
                   title='Preprocessed', titlesize=15,
                   xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
                   ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
                   docolorbar=True, caxtitle='Number of clusters\n(normalized)', caxrange=(1e-6,2), caxtitlesize=15, caxtitleoffset=30,
                   origin='lower')
    plt.subplots_adjust(wspace=0.5)
    title = me.split('-')[-1] + f', Run {run}, LS {lumi}'
    axs[0].text(0.05, 1.2, title, fontsize=15, transform=axs[0].transAxes)