In [1]:
import awkward as ak
from coffea import processor
from coffea.nanoevents.methods import candidate
import hist
import pandas as pd
import numpy as np
import pathlib
import shutil
import os
import matplotlib.pyplot as plt
import mplhep as hep
import warnings
warnings.filterwarnings('ignore')

In [2]:
class MyProcessor(processor.ProcessorABC):
    def __init__(self):
        pass

    def process(self, events):
        dataset = events.metadata['dataset']
        info = ak.zip(
            {
                "runNum": events.runNum,
                "lumiNum": events.lumiNum,
                "evtNum": events.eventNum,
                "nevt": events.eventCounter,
                "ishlt":events.isHLT_PPZeroBias,
            }
        )
        
        ca4 = ak.zip(
            {
                'ncsc':events.nca4CSCcluster,
                'eta':events.ca4CSCclusterEta,
                'phi':events.ca4CSCclusterPhi,
                'x':events.ca4CSCclusterX,        
                'y':events.ca4CSCclusterY,
                'z':events.ca4CSCclusterZ,                
                'size':events.ca4CSCclusterSize, # need for HLT numerator
                'time':events.ca4CSCclusterTime,
                'timeSpread':events.ca4CSCclusterTimeSpread,            
                "nME11_12": events.ca4CSCclusterME11_12,
                'time':events.ca4CSCclusterTime,            
                "nStation10": events.ca4CSCclusterNstation10,
                "avgStation10": events.ca4CSCclusterAvgStation10,            
            }
        )
        
        elctHMT = ak.zip(
            {
                'bits': events.elctHMT_bits,
                'WireNHits': events.elctHMT_WireNHits,
                'sr': events.elctHMT_sr,
            }
        )
        
        # preselection
        hltflag = info.ishlt
        ca4 = ca4[hltflag]
        
        names = ['ME11', 'ME12', 'ME13', 'ME21', 'ME22', 'ME31', 'ME32', 'ME41', 'ME42']
        cut_names = ['raw', 'ncsc', 'nstation']

        sta_axis = hist.axis.StrCategory(names, growth=True)
        cut_axis = hist.axis.StrCategory(cut_names, growth=True)
        cls_axis = hist.axis.Regular(100, 0, 1000, name="clusterSize")
        time_axis = hist.axis.Regular(90, -90, 90, name='time')

        h_nca4 = hist.Hist(cut_axis, hist.axis.Regular(10, 0, 10, name='nca4'), label="nca4")
        h_ca4x = hist.Hist(cut_axis, hist.axis.Regular(100, -1000, 1000, name='x'), label="ca4x")
        h_ca4y = hist.Hist(cut_axis, hist.axis.Regular(100, -1000, 1000, name='y'), label="ca4y")
        h_ca4z = hist.Hist(cut_axis, hist.axis.Regular(120, -1200, 1200, name='z'), label="ca4z")
        h_ca4phi = hist.Hist(cut_axis, hist.axis.Regular(40, -np.pi, np.pi, name='phi'), label="ca4phi")
        h_ca4eta = hist.Hist(cut_axis, hist.axis.Regular(40, -5, 5, name='eta'), label="ca4eta")
        h_ca4size = hist.Hist(cut_axis, cls_axis, label="ca4size")
        h_ca4time = hist.Hist(cut_axis, time_axis, label="ca4time")
        h_ca4nME11_12 = hist.Hist(cut_axis, hist.axis.Regular(40, 0, 40, name='nME11_12'), label="ca4_nME11_12")
        h_ca4timespread = hist.Hist(cut_axis, hist.axis.Regular(80, 0, 100, name='timespread'), label="ca4time_spread")
        h_ca4nstation = hist.Hist(cut_axis, hist.axis.Regular(8, 0, 8, name='nstation'), label="ca4_nStation10")
        h_ca4avgstation = hist.Hist(cut_axis, hist.axis.Regular(40, 0, 8, name='avgstation'), label="ca4_avgStation")

        dummy = ak.values_astype(ak.ones_like(ca4.size), 'bool') #dummy truth vector of same shape as csc variables
        cuts = {
            'raw': (ak.num(ca4, axis=1) > 0),
            'ncsc': ak.any((ca4.ncsc==1), axis=1),
            'nstation': ak.any((ca4.nStation10==1), axis=1),
        }
        
        sel = cuts[list(cuts.keys())[0]] #sets first cut
        for i, cut in enumerate(cuts):
            sel = sel & cuts[cut]
    
            h_ca4x.fill(cut_names[i], ak.flatten(ca4[sel].x))
            h_ca4y.fill(cut_names[i], ak.flatten(ca4[sel].y))
            h_ca4z.fill(cut_names[i], ak.flatten(ca4[sel].z))
            h_ca4phi.fill(cut_names[i], ak.flatten(ca4[sel].phi))
            h_ca4eta.fill(cut_names[i], ak.flatten(ca4[sel].eta))
            h_ca4size.fill(cut_names[i], ak.flatten(ca4[sel].size))
            h_ca4time.fill(cut_names[i], ak.flatten(ca4[sel].time))
            h_ca4timespread.fill(cut_names[i], ak.flatten(ca4[sel].timeSpread))
            h_ca4nME11_12.fill(cut_names[i], ak.flatten(ca4[sel].nME11_12))
            h_ca4nstation.fill(cut_names[i], ak.flatten(ca4[sel].nStation10))
            h_ca4avgstation.fill(cut_names[i], ak.flatten(ca4[sel].avgStation10))
      
        return {
            'h_ca4x': h_ca4x,
            'h_ca4y': h_ca4y,
            'h_ca4z': h_ca4z,
            'h_ca4phi': h_ca4phi,
            'h_ca4eta': h_ca4eta,
            'h_ca4size': h_ca4size,
            'h_ca4time': h_ca4time,
            'h_ca4timespread': h_ca4timespread,
            'h_ca4nME11_12': h_ca4nME11_12,
            'h_ca4nstation': h_ca4nstation,
            'h_ca4avgstation': h_ca4avgstation,
        }

    def postprocess(self, accumulator):
        return accumulator

In [3]:
import time
from coffea.nanoevents import BaseSchema

tstart = time.time()

futures_run = processor.Runner(
    executor = processor.FuturesExecutor(compression=None, workers=12),
    schema=BaseSchema,
    maxchunks=50000,
)

fileset = {
    'run2022c': open("ppZeroBias_Run2022C_hltinclude.txt").read().split("\n"),
    'run2022e': open("ppZeroBias_Run2022E_hltinclude.txt").read().split("\n")
}

# fileset = {
#     'test': [
#         './plots_1963.root',
#         #'./plots_1964.root'
#     ],
# }

output = futures_run(
    fileset,
    treename="simpleCSCshowerFilter/hmt",
    processor_instance=MyProcessor(),
)

elapsed = time.time() - tstart

print(output)

Output()

Output()

{'h_ca4x': Hist(
  StrCategory(['raw', 'ncsc', 'nstation'], growth=True, label='Axis 0'),
  Regular(100, -1000, 1000, name='x'),
  storage=Double()) # Sum: 4016254.0, 'h_ca4y': Hist(
  StrCategory(['raw', 'ncsc', 'nstation'], growth=True, label='Axis 0'),
  Regular(100, -1000, 1000, name='y'),
  storage=Double()) # Sum: 4016254.0, 'h_ca4z': Hist(
  StrCategory(['raw', 'ncsc', 'nstation'], growth=True, label='Axis 0'),
  Regular(120, -1200, 1200, name='z'),
  storage=Double()) # Sum: 4016254.0, 'h_ca4phi': Hist(
  StrCategory(['raw', 'ncsc', 'nstation'], growth=True, label='Axis 0'),
  Regular(40, -3.14159, 3.14159, name='phi'),
  storage=Double()) # Sum: 4016254.0, 'h_ca4eta': Hist(
  StrCategory(['raw', 'ncsc', 'nstation'], growth=True, label='Axis 0'),
  Regular(40, -5, 5, name='eta'),
  storage=Double()) # Sum: 4016254.0, 'h_ca4size': Hist(
  StrCategory(['raw', 'ncsc', 'nstation'], growth=True, label='Axis 0'),
  Regular(100, 0, 1000, name='clusterSize'),
  storage=Double()) # Sum:

In [4]:
import pickle
filename = 'histograms_ca4_kinematics_v2.pickle'
outfile = open(filename, 'wb')
pickle.dump(output, outfile)
outfile.close()

In [5]:
output['h_ca4time']["nstation", :]