In this benchmark, we try to achieve the highest throughput (MB/s) while reading a single ROOT file.

In [None]:
import os, sys
sys.path.append(os.getcwd()+"/../")
import copy
import yaml
from src.utils import recreate_dir
from src.benchmark import Benchmark, run_benchmark

In [None]:
default_config = {
    'data-access': {
        'mode': 'explicit-files',
        'files': []
    },
    'executor': {
        'backend': 'sequential',
        'n_workers': 1
    },
    'processor': {
        'parallelize_over': 'files',
        'columns': {},
        'load_columns_into_memory': True,
        'worker_operation_time': 0
    }
}

In [None]:
file_name = "90322FC2-4027-0E47-92E4-22307EC8EAD2.root"
file_locations = {
    "depot": "/depot/cms/users/dkondra/",
    # "work": "/work/projects/purdue-af/",
    # "eos_fuse": "/eos/purdue/store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/",
    # "xrootd": "root://eos.cms.rcac.purdue.edu:1094//store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/",
    # "xcache": "root://cms-xcache.rcac.purdue.edu:1094//store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/"
}

column_presets = {
    # "100pct": {
    #     # the bechmark will limit this to actual total number of columns
    #     "method": "n_columns",
    #     "values": 100000
    # },
    "50pct": {
        "method": "collections",
        "values": ["Jet", "Photon", "Tau", "Electron", "Muon"]
    },
    # "10pct": {
    #     "method": "collections",
    #     "values": ["Muon"]
    # },
    # "5pct": {
    #     "method": "column_list",
    #     "values": [
    #         "run", "luminosityBlock", "HLT_IsoMu24", "PV_npvsGood", "fixedGridRhoFastjetAll",
    #         "Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass", "Muon_charge", "Muon_pfRelIso04_all", "Muon_mediumId", "Muon_ptErr",
    #         "Electron_pt", "Electron_eta", "Electron_mvaFall17V2Iso_WP90",
    #         "Jet_pt", "Jet_eta", "Jet_phi", "Jet_mass",
    #     ]
    # }
}

In [None]:
def generate_configs(save_dir="./"):
    recreate_dir(save_dir)

    iconf = 0

    for f_label, file_loc in file_locations.items():
        for c_label, column_setup in column_presets.items():
            config = copy.deepcopy(default_config)

            config["data-access"]["files"] = [f"{file_loc}/{file_name}"]
            config["processor"]["columns"] = column_setup

            # Custom labels to save to output dataframe
            config["custom_labels"] = {
                "file_location": f_label,
                "column_setup": c_label
            }

            config_name = f'config2p1_{iconf}_{f_label}_{c_label}.yaml'
            
            with open(f'{save_dir}/{config_name}', 'w') as file:
                yaml.dump(config, file, default_flow_style=False)

            iconf += 1

    print(f'Saved {iconf} config files to {save_dir}')

In [None]:
# warning: all YAML files will be deleted fron this directory before proceeding
config_path = "./configs_2.1"

In [None]:
generate_configs(config_path)

In [None]:
# report = run_benchmark(config_path)
import cProfile
cProfile.run('run_benchmark(config_path)', 'profile_output.prof')

In [None]:
import matplotlib.pyplot as plt

report["MB/s (compressed)"] = report.compressed_bytes/report.run_processor/(1024*1024)

report_by_column_setup = report.groupby('column_setup')

plt.figure(figsize=(8, 6))
for col_setup, group in report_by_column_setup:
    plt.plot(group.n_workers, group["MB/s (compressed)"], label=col_setup)

plt.xlabel('# workers')
plt.ylabel('MB/s')
plt.legend()
plt.xlim(0, report.n_workers.max()+5)
plt.ylim(0, report['MB/s (compressed)'].max() + 5)
plt.show()

In [None]:
report["event_rate"] = report.n_events / report.run_processor / report.n_columns_read
report["data_rate_comp"] = report.compressed_bytes / report.run_processor
report["data_rate_uncomp"] = report.uncompressed_bytes / report.run_processor
report[["column_setup", "n_workers", "event_rate", "data_rate_comp", "data_rate_uncomp"]]

In [None]:
report.compressed_bytes / report.n_events  * report.n_columns_read

In [None]:
import uproot
uproot.__version__