In [None]:
# imports

# external modules
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import importlib
# framework modules
sys.path.append('../')
import plotting.plottools
importlib.reload(plotting.plottools)
from plotting.plottools import plot_histogram
# local modules
import prepare_training_set
importlib.reload(prepare_training_set)
from prepare_training_set import prepare_training_data_from_files
import patternfiltering
importlib.reload(patternfiltering)
from patternfiltering import contains_pattern, contains_any_pattern

In [None]:
# get some example histograms

fname = '../data/data/ZeroBias-Run2023C-PromptReco-v1-DQMIO-PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+1_preprocessed.parquet'
kwargs = ({
    'verbose': True,
    'entries_threshold': 10000,
    'skip_first_lumisections': 5
})
(hists, runs, lumis) = prepare_training_data_from_files([fname], **kwargs)
hists = hists[:,:,:,0]
print(hists.shape)

In [None]:
# make a mask where values are often zero

shape_mask = (np.sum(hists==0, axis=0)>len(hists)/2.)

fig,ax = plt.subplots()
plot_histogram(shape_mask, fig=fig, ax=ax, caxrange=(-0.01,1))
ax.text(0.02, 1.02, 'Shape mask', transform=ax.transAxes, fontsize=12)

In [None]:
# find one pattern in one histogram

pattern = np.zeros((3,1))
hist = hists[1000]
contains = contains_pattern(hist, pattern, mask=~shape_mask)
print(contains)
fig, ax = plot_histogram(hist)

In [None]:
# find logical or of multiple patterns in one histogram

patterns = [np.zeros((3,3)), np.zeros((3,2))]
hist = hists[1000]
contains = contains_any_pattern(hist, patterns, mask=~shape_mask)
print(contains)
fig, ax = plot_histogram(hist)

In [None]:
# do filtering

patterns = [np.zeros((2,2)), np.zeros((3,1)), np.zeros((1,3))]
contains = contains_any_pattern(hists, patterns, mask=~shape_mask)
print('{} out of {} histograms contain one of these patterns'.format(sum(contains), len(contains)))
print('Examples:')

nplots = 5
plotids = np.random.choice(np.arange(len(hists))[contains], size=nplots)

for i in plotids:
    fig, ax = plot_histogram(hists[i])
    ax.text(0.02, 1.02, 'Run: {}, lumi: {}'.format(runs[i], lumis[i]), transform=ax.transAxes, fontsize=12)

In [None]:
# reload the data with filtering

fname = '../data/data/ZeroBias-Run2023C-PromptReco-v1-DQMIO-PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+1_preprocessed.parquet'
kwargs = ({
    'verbose': True,
    'entries_threshold': 10000,
    'skip_first_lumisections': 5,
    'veto_patterns': [np.zeros((2,2)), np.zeros((3,1)), np.zeros((1,3))]
})
(hists, runs, lumis) = prepare_training_data_from_files([fname], **kwargs)
hists = hists[:,:,:,0]
print(hists.shape)

In [None]:
# plot examples of histograms that make it in the training set

nplots = 5
plotids = np.random.choice(np.arange(len(hists)), size=nplots)

for i in plotids:
    fig, ax = plot_histogram(hists[i])
    ax.text(0.02, 1.02, 'Run: {}, lumi: {}'.format(runs[i], lumis[i]), transform=ax.transAxes, fontsize=12)