In [None]:
import sys
import os
import numpy as np
import pandas as pd
from pyarrow.parquet import ParquetFile
from pyarrow.parquet import ParquetDataset
import pyarrow as pa
import importlib
import plottools
importlib.reload(plottools)
from plottools import plot_histogram
from plottools import plot_histograms_gif

In [None]:
# define file

#fname = '../data/data/ZeroBias-Run2024B-PromptReco-v1-DQMIO-PixelPhase1-Phase1_MechanicalView-PXBarrel-clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_1.parquet'
fname = '../data/data/ZeroBias-Run2024B-PromptReco-v1-DQMIO-PixelPhase1-Phase1_MechanicalView-PXForward-clusters_per_SignedDiskCoord_per_SignedBladePanelCoord_PXRing_1.parquet'

In [None]:
# print available runs

df = pd.read_parquet(fname, columns=['fromrun'])
runs = sorted(list(set(df['fromrun'].tolist())))
print('Available run numbers in file {}:'.format(fname))
print(runs)

In [None]:
# define run and lumisection filters
# (see https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html)

filters = []
filters.append( ('fromrun', '=', 379008) )
#filters.append( ('fromlumi', '>', 10) )
#filters.append( ('fromlumi', '<', 4) )

In [None]:
# read the data

df = ParquetDataset(fname, filters=filters).read().to_pandas()
print('Found {} entries'.format(len(df)))
xbins = df['Xbins'][0]
ybins = df['Ybins'][0]
hists = np.array([df['histo'][i].reshape(xbins,ybins) for i in range(len(df))])
runs = np.array(df['fromrun'])
lumis = np.array(df['fromlumi'])

In [None]:
# plot the data

for i,hist in enumerate(hists):
    fig, ax = plot_histogram(hist, figsize=(8,8), xaxtitle='x (bin number)', xaxtitlesize=12, yaxtitle='y (bin number)', yaxtitlesize=12,
                             caxrange=(0.01,5000), caxtitle='Occupancy', caxtitlesize=15, caxtitleoffset=20)
    ax.text(0.02, 1.02, 'Run: {}, lumi: {}'.format(runs[i], lumis[i]), transform=ax.transAxes, fontsize=12)
    fig.tight_layout()
    #fig.savefig('fig_{}.png'.format(i), dpi=300)

In [None]:
# make a gif

titles = ['Run {}, LS {}'.format(runs[i], lumis[i]) for i in range(len(hists))]
figname = 'temp_gif.gif'
caxrange = (0.01, 5000)

plot_histograms_gif(hists, titles=titles, figname=figname,
                    figsize=(8,8), caxrange=caxrange,
                    duration=300, mode='imageio')

In [None]:
# display the gif

import IPython
from IPython.display import Image
Image(filename=figname)