In [None]:
# imports

import os
import sys
import json
import time
import joblib
import importlib
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from functools import partial

thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../../'))
sys.path.append(topdir)

import tools.iotools as iotools

In [None]:
# set path to input files

eras = [
    'B-v1',
    'C-v1',
    'D-v1',
    'E-v1',
    'E-v2',
    'F-v1',
    #'F-v1-part1',
    #'F-v1-part2',
    #'F-v1-part3',
    #'F-v1-part4',
    'G-v1',
    #'G-v1-part1',
    #'G-v1-part2',
    #'G-v1-part3',
    #'G-v1-part4',
    'H-v1',
    'I-v1',
    'I-v2',
]
layer = 'BPix1'

# settings
datadir = '/eos/user/l/llambrec/dialstools-output'
year = '2024'
dataset = 'ZeroBias'
reco = 'PromptReco'
mebase = 'PixelPhase1-Phase1_MechanicalView-PXBarrel-'
mebase += 'clusters_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_'

# find files corresponding to settings
input_files = {}
for era in eras:
    mainera, version = era.split('-', 1)
    input_files[era] = {}
    f = f'{dataset}-Run{year}{mainera}-{reco}-{version}-DQMIO-{mebase}{layer[-1]}.parquet'
    f = os.path.join(datadir, f)
    input_files[era][layer] = f
    
# existence check
missing = []
present = []
for _, values in input_files.items():
    for layer, f in values.items():
        if not os.path.exists(f): missing.append(f)
        else: present.append(f)
if len(missing) > 0:
    raise Exception(f'The following files do not exist: {missing}')
else:
    print(f'Found {len(present)} files.')

In [None]:
# load total number of lumisections per era

nlumis = {}
for era in eras:
    dftemp = iotools.read_parquet(input_files[era][layer], columns=['run_number'])
    nlumis[era] = len(dftemp)
nlumis['total'] = sum(nlumis.values())

In [None]:
# load filtering results per era

filter_results = {}
nflags = {}
for era in eras:
    flagged_ls_file = f'output_baseline/flagged_lumisections_{era}.json'
    with open(flagged_ls_file, 'r') as f:
        info = json.load(f)
    filter_results[era] = info['filter_results']
    nflags[era] = len(info['flagged_run_numbers'])

In [None]:
# make a plot of the filter results

do_per_era_plot = False
do_total_plot = True

# help functions for plotting
def abs_to_frac(x, tot=1):
    return x / tot

def frac_to_abs(x, tot=1):
    return x * tot

nfiltered = {}
for era in eras:

    # make a table
    filter_results_arrays = {key: np.array([el[0]*10000+el[1] for el in val]) for key, val in filter_results[era].items()}
    failed_ls = np.unique(np.concatenate(list(filter_results_arrays.values())))
    nfiltered[era] = {key: len(val) for key, val in filter_results_arrays.items()}
    nfiltered[era]['total'] = len(failed_ls)

    if do_per_era_plot:
        # make a figure
        fig, ax = plt.subplots(figsize=(8, 4))
        ax.bar(nfiltered[era].keys(), nfiltered[era].values())
        ax.set_xticks(ax.get_xticks())
        ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', fontsize=15)
        ax.set_ylabel('Number of failing LS', fontsize=15)
        ax.grid(which='both', axis='y', color='gray', linestyle='dashed')
        ax.text(0, 1.03, f'Lumisection preselection for era {era}', transform=ax.transAxes, fontsize=15)
        plt.show()
    
# combined
nfiltered['total'] = {}
for key in nfiltered[eras[0]]:
    nfiltered['total'][key] = sum([nfiltered[era][key] for era in eras])
    
if do_total_plot:
    # make a figure
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.bar(nfiltered['total'].keys(), nfiltered['total'].values())
    ax.set_xticks(ax.get_xticks())
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', fontsize=15)
    ax.set_ylabel('Number of failing LS', fontsize=15)
    ax.grid(which='both', axis='y', color='gray', linestyle='dashed')
    ax.text(0, 1.03, f'Lumisection preselection', transform=ax.transAxes, fontsize=15)
    secyax = ax.secondary_yaxis('right', functions=(partial(abs_to_frac, tot=nlumis['total']), partial(frac_to_abs, tot=nlumis['total'])))
    secyax.set_ylabel('Fraction of failed LS', fontsize=15)
    plt.show()

In [None]:
# print fraction of flagged LS per era

tot_nlumis = 0
tot_npass = 0
tot_nflags = 0
for era in eras:
    this_nlumis = nlumis[era]
    this_npass = this_nlumis - nfiltered[era]['total']
    this_nflags = nflags[era]
    tot_nlumis += this_nlumis
    tot_npass += this_npass
    tot_nflags += this_nflags
    print(f'Era {era}:')
    print(f'Flagged {this_nflags} out of {this_nlumis} lumisections in total' + ' ({:.2f} %)'.format(this_nflags/this_nlumis*100))
    print(f'Flagged {this_nflags} out of {this_npass} lumisections that pass filters' + ' ({:.2f} %)'.format(this_nflags/this_npass*100))
    
print('-----')
print(f'Total:')
print(f'Flagged {tot_nflags} out of {tot_nlumis} lumisections in total' + ' ({:.2f} %)'.format(tot_nflags/tot_nlumis*100))
print(f'Flagged {tot_nflags} out of {tot_npass} lumisections that pass filters' + ' ({:.2f} %)'.format(tot_nflags/tot_npass*100))