Investigate time structure of numerous holes and similar anomalies observed in era C and following.

In [None]:
# imports

import os
import sys
import json
import joblib
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../'))
sys.path.append(topdir)

import tools.iotools as iotools
import tools.dftools as dftools
import plotting.plottools as plottools

from studies.clusters_2024.preprocessing.preprocessor import make_default_preprocessor

In [None]:
# set path to files

# settings
datadir = '/eos/user/l/llambrec/dialstools-output'
year = '2024'
eras = {
    'A': ['v1'],
    'B': ['v1'],
    'C': ['v1'],
    'D': ['v1'],
    'E': ['v1', 'v2'],
    'F': ['v1'],
    'G': ['v1'],
    'H': ['v1'],
    'I': ['v1', 'v2'],
    'J': ['v1']
}
dataset = 'ZeroBias'
reco = 'PromptReco'
mebase = 'PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_{}'
layer = 1
me = mebase.format(layer)

# find files corresponding to settings
files = {}
for era, versions in eras.items():
    for version in versions:
        f = f'{dataset}-Run{year}{era}-{reco}-{version}-DQMIO-{me}.parquet'
        f = os.path.join(datadir, f)
        files[f'{era}-{version}'] = f

# existence check
missing = []
for f in files.values():
    if not os.path.exists(f):
        missing.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
else:
    print(f'Found {len(files)} files.')

In [None]:
# pick a batch and read the corresponding data

era = 'C-v1'
batch_size = 50000
first_batch = 0
last_batch = 0

# load dataframe
df = iotools.read_parquet(files[era], verbose=True, batch_size=batch_size, first_batch=first_batch, last_batch=last_batch)
ndf = len(df)

# do filtering
df = df[df['entries'] > 0.5e6]
print(f'Found {len(df)} / {ndf} instances passing filters.')

In [None]:
# do preprocessing

preprocessor = make_default_preprocessor(era, layer)
mes = preprocessor.preprocess(df)
runs = df['run_number'].values
lumis = df['ls_number'].values

# alternative option: without preprocessing
#mes, runs, lumis = dftools.get_mes(df, xbinscolumn='x_bin', ybinscolumn='y_bin', runcolumn='run_number', lumicolumn='ls_number')

# printouts for checking
print(f'Shape of ME array: {mes.shape}')

In [None]:
# count zeros per bin

nzeros = np.count_nonzero(mes==0, axis=0)
zerofrac = nzeros / len(mes)

In [None]:
# plot

fig, ax = plottools.plot_hist_2d(zerofrac,
            title='Fraction of lumisections empty', titlesize=15,
            xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
            ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None,
            docolorbar=True, caxtitle='Fraction of lumisections',
            caxrange=(0, 1),
            caxtitlesize=15, caxtitleoffset=30,
            origin='lower')
title = me.split('-')[-1]
ax.text(0.01, 1.3, title, fontsize=15, transform=ax.transAxes)

In [None]:
# plot time evolution of occupancy for particular bins

yidx = 3
xidx = 19

fig, ax = plt.subplots(figsize=(18,6))
xax = np.arange(len(mes))
data = mes[:, yidx, xidx]
ax.plot(xax, data, color='b')
ax.set_ylim(-0.1, 2)
ax.axhline(y=0, color='grey', linestyle='dashed')
ax.set_xlabel('Lumisection', fontsize=15)
ax.set_ylabel('Normalized occupancy', fontsize=15)

# zoom in
#ax.set_xlim(19400, 19420)