In [None]:
# imports

import os
import sys
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../'))
sys.path.append(topdir)

import tools.iotools as iotools
import tools.dftools as dftools
import plotting.plottools as plottools
from studies.pixel_clusters_2024.plotting.plot_cluster_occupancy import plot_cluster_occupancy

In [None]:
# set path to files

# settings
datadir = '/eos/user/l/llambrec/dialstools-output'
eras = {
    '2024A': ['v1'],
    '2024B': ['v1'],
    '2024C': ['v1'],
    '2024D': ['v1'],
    '2024E': ['v1', 'v2'],
    '2024F': ['v1'],
    '2024G': ['v1'],
    '2024H': ['v1'],
    '2024I': ['v1', 'v2'],
    '2024J': ['v1'],
    '2025B': ['v1'],
    '2025C': ['v1', 'v2'],
    '2025D': ['v1'],
    '2025E': ['v1'],
    #'2025F': ['v1'],
}
dataset = 'ZeroBias'
reco = 'PromptReco'
mebase = 'PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_{}'
#mebase = 'PixelPhase1-Phase1_MechanicalView-PXForward-clusters_per_SignedDiskCoord_per_SignedBladePanelCoord_PXRing_{}'
layer = 1
me = mebase.format(layer)

# find files corresponding to settings
files = {}
for era, versions in eras.items():
    for version in versions:
        f = f'{dataset}-Run{era}-{reco}-{version}-DQMIO-{me}.parquet'
        f = os.path.join(datadir, f)
        files[f'{era}-{version}'] = f

# existence check
missing = []
for f in files.values():
    if not os.path.exists(f):
        missing.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
else:
    print(f'Found {len(files)} files.')

In [None]:
# print a few instances

f = files['2025C-v1']
df = iotools.read_parquet(f)
print(len(df))

In [None]:
# print number of runs and lumisections

print('Finding number of runs and lumisections:')
tot_nruns = 0
tot_nlumis = 0
for era, f in files.items():
    df = iotools.read_parquet(f, columns=['run_number', 'ls_number'])
    run_numbers = dftools.get_runs(df, runcolumn='run_number')
    firstrun = run_numbers[0]
    lastrun = run_numbers[-1]
    nruns = len(run_numbers)
    nlumis = len(dftools.get_ls(df, lumicolumn='ls_number'))
    print(f'  - {era}: {nruns} runs, {nlumis} lumisections (run {firstrun} - {lastrun})')
    tot_nruns += nruns
    tot_nlumis += nlumis
print(f'Total: {tot_nruns} runs, {tot_nlumis} lumisections')

In [None]:
# make a plot of the number of entries and find fraction of empty lumisections

# read number of entries
all_entries = []
all_runs = []
all_lumis = []
era_divisions = {}
counter = 0
for era, f in files.items():
    df = iotools.read_parquet(f, columns=['entries', 'run_number', 'ls_number'])
    all_entries.append(df['entries'].values)
    all_runs.append(df['run_number'].values)
    all_lumis.append(df['ls_number'].values)
    era_divisions[era] = (counter, counter+len(df))
    counter += len(df)
all_entries = np.concatenate(all_entries)
all_runs = np.concatenate(all_runs)
all_lumis = np.concatenate(all_lumis)

# calculate fraction of empty monitoring elements
nempty = np.sum(all_entries==0)
frac = float(nempty)/len(all_entries)
print(f'Empty monitoring elements: {nempty}/{len(all_entries)}' + ' ({:.2f}%)'.format(frac*100))

# make a plot
fig, ax = plt.subplots(figsize=(18,6))
xax = np.arange(len(all_entries))
ax.step(xax, all_entries, where='mid', color='b')
ax.set_ylim((0, ax.get_ylim()[1]*1.2))
xscale = (ax.get_xlim()[1] - ax.get_xlim()[0])
yscale = (ax.get_ylim()[1] - ax.get_ylim()[0])
for idx, (eraname, div) in enumerate(era_divisions.items()):
    if idx==0: ax.axvline(x=div[0], color='grey', linestyle='--')
    ax.axvline(x=div[1], color='grey', linestyle='--')
    txtleft = div[0] + 0.005*xscale
    txtbottom = yscale*(0.8 - 0.05*(idx%3))
    ax.text(txtleft, txtbottom, eraname, fontsize=12)
ax.set_xlabel('Lumisection', fontsize=15)
ax.set_ylabel('Number of entries', fontsize=15)
_ = ax.set_title(me.split('-')[-1], fontsize=15)

# make an alternative plot showing the distribution
fig, ax = plt.subplots(figsize=(8,6))
bins = np.linspace(0, 1e7, num=51)
cids = np.linspace(0, 1, num=len(files))
cmap = plt.get_cmap('cool')
for idx, (era, f) in enumerate(files.items()):
    df = iotools.read_parquet(f, columns=['entries'])
    entries = df['entries'].values
    ax.hist(entries, bins=bins, density=True,
            histtype='step', linewidth=2,
            label=era, color=cmap(cids[idx]))
ax.set_yscale('log')
ax.grid(which='both')
ax.set_xlabel('Number of entries', fontsize=15)
ax.set_ylabel('Number of lumisections (normalized)', fontsize=15)
ax.legend()
_ = ax.set_title(me.split('-')[-1], fontsize=15)

In [None]:
# investigate major spike at the end of 2024D-v1

ids = np.nonzero(all_entries[:200000] > 1e7)
print(ids)
print(all_runs[ids])
print(all_lumis[ids])
subset_ids = np.nonzero(all_runs==380945)

# make a plot
fig, ax = plt.subplots(figsize=(12,6))
subset_entries = all_entries[subset_ids]
xax = np.arange(len(subset_entries))
ax.step(xax, subset_entries, where='mid', color='b')
xscale = (ax.get_xlim()[1] - ax.get_xlim()[0])
yscale = (ax.get_ylim()[1] - ax.get_ylim()[0])
ax.set_xlabel('Lumisection', fontsize=15)
ax.set_ylabel('Number of entries', fontsize=15)

In [None]:
# investigate major spikes in era 2025D-v1

ids = np.nonzero(all_entries > 4e7)
print(ids)
print(all_runs[ids])
print(all_lumis[ids])
subset_ids = np.nonzero(all_runs==394637)

# make a plot
fig, ax = plt.subplots(figsize=(12,6))
subset_entries = all_entries[subset_ids]
xax = np.arange(len(subset_entries))
ax.step(xax, subset_entries, where='mid', color='b')
xscale = (ax.get_xlim()[1] - ax.get_xlim()[0])
yscale = (ax.get_ylim()[1] - ax.get_ylim()[0])
ax.set_xlabel('Lumisection', fontsize=15)
ax.set_ylabel('Number of entries', fontsize=15)

In [None]:
# make an example plot

import studies.pixel_clusters_2024.plotting.plot_cluster_occupancy
importlib.reload(studies.pixel_clusters_2024.plotting.plot_cluster_occupancy)
from studies.pixel_clusters_2024.plotting.plot_cluster_occupancy import plot_cluster_occupancy

# find available runs
era = '2025D-v1'
dftemp = iotools.read_parquet(files[era], columns=['run_number', 'ls_number'])
runs = dftools.get_runs(dftemp, runcolumn='run_number')
print(f'Available runs: {runs}')

# find available lumisections
run = 394637
lumis = dftools.get_ls(dftools.select_runs(dftemp, [run], runcolumn='run_number'), lumicolumn='ls_number')
print(f'Available lumisections: {lumis}')
lumi = 40

# load me
df = iotools.read_lumisections(files[era], [run], [lumi])
xbins = int(df['x_bin'][0])
ybins = int(df['y_bin'][0])
print(f'Number of x-bins: {xbins}')
print(f'Number of y-bins: {ybins}')
mes, _, _ = dftools.get_mes(df, xbinscolumn='x_bin', ybinscolumn='y_bin', runcolumn='run_number', lumicolumn='ls_number',
                            runs=[run], lumis=[lumi])
print(f'Shape of mes array: {mes.shape}')

#title = me.split('-')[-1]+ '\n' + f'Run {run}, LS {lumi}'
if 'PXLayer' in mebase: title = f'Run {run}, LS {lumi}, PXLayer {me.split("_")[-1]}'
elif 'PXRing' in mebase: title = f'Run {run}, LS {lumi}, PXRing {me.split("_")[-1]}'

# advanced plot
fig, ax = plot_cluster_occupancy(mes[0], figsize=(12,6), title=title, titlesize=15,
                xaxtitlesize=15, yaxtitlesize=15,
                ticklabelsize=12, colorticklabelsize=12,
                #caxrange=(0, 4000),
                docolorbar=True, caxtitle='Number of clusters', caxtitlesize=15, caxtitleoffset=15)

# more basic plot as backup
#fig, ax = plottools.plot_hist_2d(mes[0], figsize=(12,6), title=title, titlesize=15,
#                xaxtitle=None, xaxtitlesize=None, yaxtitle=None, yaxtitlesize=None,
#                ticklabelsize=12, colorticklabelsize=12, extent=None, aspect=None, caxrange=None,
#                docolorbar=True, caxtitle='Number of clusters', caxtitlesize=15, caxtitleoffset=15,
#                origin='lower')