In [None]:
from __future__ import annotations

import argparse
import logging
import os
import time
from os.path import join
import sys
sys.path.append('../')
import warnings
warnings.filterwarnings("ignore")

import numpy as np
from coffea import processor, util
from coffea.nanoevents import NanoAODSchema

from azh_analysis.processors.btag_eff_processor import bTagEffProcessor
from azh_analysis.utils.sample import get_fileset, get_sample_info

year="2018"
source="MC_UL"
verbose=True
add_signal=True
show_config=True
interactive=True
test_mode=True

# setup logging
log_format = "%(asctime)s %(levelname)s %(message)s"
log_level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(level=log_level, format=log_format)
logging.info("Initializing")

# relevant parameters
csv_indir = "../samples"
yaml_indir = "../samples/filesets"
fileset = get_fileset(join(yaml_indir, f"{source}_{year}.yaml"))
sample_info = get_sample_info(join(csv_indir, f"{source}_{year}.csv"))
if add_signal:
    signal_yaml = f"signal_UL_{year[:4]}.yaml"
    fileset.update(get_fileset(join(yaml_indir, signal_yaml)))
    signal_csv = join(csv_indir, f"signal_UL_{year[:4]}.csv")
    sample_info = np.append(sample_info, get_sample_info(signal_csv))


fileset = {k: v for k, v in fileset.items()}
for f, l in fileset.items():
    print(f, len(l), "\n")
if test_mode:
    fileset = {k: v[:1] for k, v in fileset.items()}

# start timer, initiate cluster, ship over files
tic = time.time()

# instantiate processor module
processor_instance = bTagEffProcessor()
futures_run = processor.Runner(
    executor=processor.FuturesExecutor(),
    schema=NanoAODSchema,
)

hists, metrics = futures_run(
    fileset,
    treename="Events",
    processor_instance=processor_instance,
)

# measure, report summary statistics
elapsed = time.time() - tic
logging.info(f"Output: {hists}")
logging.info(f"Metrics: {metrics}")
logging.info(f"Finished in {elapsed:.1f}s")
logging.info(f"Events/s: {metrics['entries'] / elapsed:.0f}")