# Master notebook for evaluating  performaces


In [None]:
import os
import glob
import tqdm
import numpy as np
import mplhep as hep
import awkward as ak
import matplotlib.pyplot as plt
from hydra import compose, initialize
from omegaconf import OmegaConf

from enreg.tools import general as g
from enreg.tools.metrics import (
    regression_evaluator as re,
    decay_mode_evaluator as dme,
    tagger_evaluator as te
)

with initialize(version_base=None, config_path="../ml-tau-en-reg/enreg/config/", job_name="test_app"):
    cfg_metrics = compose(config_name="benchmarking")
    cfg = compose(config_name="evaluation_params")

hep.style.use(hep.styles.CMS)

In [2]:
# Define basic directories
OUTPUT_DIR = cfg.directories.OUTPUT_DIR
BASE_DATA_DIR = cfg.directories.BASE_DATA_DIR
BASE_TRAINING_DIR = cfg.directories.BASE_TRAINING_DIR

In [4]:
# Define data directory dictionary
SAMPLE_DIR = {
    "z_test": os.path.join(BASE_DATA_DIR, "z_test.parquet"),
    "zh_test": os.path.join(BASE_DATA_DIR, "zh_test.parquet"),
    "qq_test": os.path.join(BASE_DATA_DIR, "qq_test.parquet"),
    "z_train": os.path.join(BASE_DATA_DIR, "z_train.parquet"),
    "zh_train": os.path.join(BASE_DATA_DIR, "zh_train.parquet"),
    "qq_train": os.path.join(BASE_DATA_DIR, "qq_train.parquet"),
}

def train_data_loader(key):
    """
    Returns ak.Array
    """
    return g.load_all_data(SAMPLE_DIR[key], columns=['reco_jet_p4s', "gen_jet_tau_p4s"])

LOADED_DATA = {key: train_data_loader(key) for key in cfg.training_data if key in SAMPLE_DIR}

In [None]:
def create_reco_entries(loaded_data, key,  output_dir):
    """
    Creates reconstruced stuffs
    """
    data_to_save = {
        "reco_jet_pt": g.reinitialize_p4(loaded_data.reco_jet_p4s).pt,
        "gen_tau_pt": g.reinitialize_p4(loaded_data.gen_jet_tau_p4s).pt
    }
    output_dir = os.path.join(output_dir, "recoJet")
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, f"{key}.parquet")
    print(f"Saving to {output_path}")
    ak.to_parquet(ak.Record(data_to_save), output_path, row_group_size=1024)

for idx, (key, value) in enumerate(LOADED_DATA.items()):
    create_reco_entries(LOADED_DATA[key], key, OUTPUT_DIR)

In [7]:
# Training types
training_types = ['20201204_ParT_ntrain_v2', '20201206_updated_DM_training',
                  '20201211_ParT_no_PMHA' ,'20241220_unfreeze_from_start',
                  '20241204_ParT_ntrain','20250109_OmniParT_2GPT_layers']
# Model parameters
training_types = cfg.evaluating['training_types']
model_versions = cfg.evaluating['model_types']['version']
train_fracs = cfg.evaluating['model_types']['train_frac']
tasks = cfg.evaluating['model_types']['tasks']

models = cfg.evaluating['model_types']['models']
model_test_type = cfg.evaluating['model_types']['z_OR_zh_test']

In [None]:
def model_loader(base_dir, training, v, fraction, task, model, test_type):
    full_directory = os.path.join(base_dir, training, v, fraction, task, model, test_type + '.parquet')
    return g.load_all_data(full_directory)

def load_model_data(base_dir, training, versions, fractions, tasks, models, test_type):
    """
    Dynamically load data for specified model parameters.
    Args:
    - base_dir (str): Base directory for models.
    - versions (str or list): Single version or list of versions to process.
    - fractions (str or list): Single fraction or list of fractions to process.
    - tasks (str or list): Single task or list of tasks to process.
    - models (str or list): Single model or list of models to process.
    - test_type (str or list): Test type (e.g., 'zh_test').
    Returns:
    - dict: Loaded data indexed by parameter combinations.
    """
    # Convert inputs into lists if they aren't already lists
    if isinstance(training, str):
        training = [training]
    if isinstance(versions, str):
        versions = [versions]
    if isinstance(fractions, str):
        fractions = [fractions]
    if isinstance(tasks, str):
        tasks = [tasks]
    if isinstance(models, str):
        models = [models]
    if isinstance(test_type, str):
        test_type = [test_type]
    
    loaded_data = {}
    ############################### THIS IS LOADING FROM and looping over DIRECTORIES THAT DONT EXIST
    # Loop over all combinations of the parameters
    for tra in training:
        for v in versions:
            for fraction in fractions:
                for task in tasks:
                    for model in models:
                        for test in test_type:
                            dataset_name = f"{tra}/{v}/{fraction}/{task}/{model}/{test}"
                            try:
                                print(f"Loading data for {dataset_name}")
                                loaded_data[dataset_name] = model_loader(base_dir, tra, v,
                                                                         fraction, task, model, test)
                            except ValueError as e:
                                print(f"Failed to load {dataset_name}: {e}")
    return loaded_data

# Example: Load data for ParticleTransformer with single version and task
loaded_data = load_model_data(
    BASE_TRAINING_DIR,
    training_types,
    model_versions,
    train_fracs,
    tasks,
    models,
    model_test_type
)

# Access specific datasets after loading
PT_jet_regression = loaded_data.get("20201204_ParT_ntrain_v2/v1/trainfrac_1e4/jet_regression/ParticleTransformer/zh_test")
PT_dm_multiclass = loaded_data.get("20201204_ParT_ntrain_v2/v1/trainfrac_1e4/dm_multiclass/ParticleTransformer/zh_test")
PT_binary_classification = loaded_data.get("20201204_ParT_ntrain_v2/v1/trainfrac_1e4/binary_classification/ParticleTransformer/zh_test")

omni_pt_reg = loaded_data.get('20250113_OmniParT_fine_tuning_freeze_60/v1/trainfrac_1e4/jet_regression/OmniParT_fine_tuning/zh_test')
omni_pt_dm = loaded_data.get('20250113_OmniParT_fine_tuning_freeze_60/v1/trainfrac_1e4/dm_multiclass/OmniParT_fine_tuning/zh_test')
omni_pt_bin = loaded_data.get('20250113_OmniParT_fine_tuning_freeze_60/v1/trainfrac_1e4/binary_classification/OmniParT_fine_tuning/zh_test')
for i in loaded_data.items():
    print(i)

In [11]:
evaluators = {
    # "HPS_":  re.RegressionEvaluator(hps_data.pred_pt[hpsPlus_mask], hps_data.true_pt[hpsPlus_mask], cfg.metrics.regression, "zh", "HPS_"),
    # "RecoJet":  re.RegressionEvaluator(recoJet_data.reco_jet_pt, recoJet_data.gen_tau_pt, cfg.metrics.regression, "zh", "RecoJet"),
    "PT":   re.RegressionEvaluator(PT_jet_regression.jet_regression.pred, PT_jet_regression.jet_regression.target,
                                   cfg_metrics.metrics.regression, "zh", "ParticleTransformer"),
    # "omni_pt": re.RegressionEvaluator(omni_pt_reg.jet_regression.pred, omni_pt_reg.jet_regression.target,
    #                                cfg_metrics.metrics.regression, "zh", "OmniParT")
    # "LN":  re.RegressionEvaluator(LN_data.jet_regression.pred, LN_data.jet_regression.target, cfg.metrics.regression, "zh", "LorentzNet"),
    # "DS":  re.RegressionEvaluator(DS_data.jet_regression.pred, DS_data.jet_regression.target, cfg.metrics.regression, "zh", "DeepSet")
}

In [12]:
os.makedirs(OUTPUT_DIR+'plots', exist_ok=True)
rme = re.RegressionMultiEvaluator(OUTPUT_DIR+'plots', cfg_metrics.metrics.regression, "zh")

rme.combine_results([evaluator for evaluator in evaluators.values()])
rme.save()

In [None]:
rme.resolution_lineplot.fig

In [None]:
rme.response_lineplot.fig

## Decay mode performances

In [15]:
for algorithm, algo_info in cfg_metrics.metrics.dm_reconstruction.algorithms.items():
    for signal_sample in cfg_metrics.metrics.dm_reconstruction.signal_samples:
        
        if not os.path.exists(os.path.join(algo_info.data_dir, SAMPLE_DIR)):
            continue
        sig_data = g.load_all_data(os.path.join(algo_info.data_dir, signal_sample + ".parquet"))

        output_dir = OUTPUT_DIR + 'DM'
        evaluator = dme.DecayModeEvaluator(PT_dm_multiclass.dm_multiclass.pred,
                                           PT_dm_multiclass.dm_multiclass.target,
                                           output_dir, signal_sample, algorithm)
        evaluator.save_performance()

In [None]:
raise SystemExit("Execution halted at this point.")

In [None]:
task = "binary_classification"
evaluators = []
for algorithm in algorithms:
    base_path = os.path.join("/home/laurits/ml-tau-en-reg/training-outputs/Trainings/v1", task, algorithm)
    bkg_data = g.load_all_data(os.path.join(base_path, "qq_test.parquet"))
    for signal_sample in signal_samples:
        sig_info_data = data[signal_sample] # sig_info_data = PT_jet_regression??
        bkg_info_data = data['qq_test']
        sig_data = g.load_all_data(os.path.join(base_path, signal_sample + ".parquet"))
        
        evaluator = te.TaggerEvaluator(
            signal_predictions=sig_data.binary_classification.pred,
            signal_truth=sig_data.binary_classification.target,
            signal_gen_tau_p4=sig_info_data.gen_jet_tau_p4s,
            signal_reco_jet_p4=sig_info_data.reco_jet_p4s,
            bkg_predictions=bkg_data.binary_classification.pred,
            bkg_truth=bkg_data.binary_classification.target,
            bkg_gen_jet_p4=bkg_info_data.gen_jet_p4s,
            bkg_reco_jet_p4=bkg_info_data.reco_jet_p4s,
            cfg=cfg.metrics.classifier,
            sample=signal_sample,
            algorithm=algorithm
        )
        evaluators.append(evaluator)

tme = te.TaggerMultiEvaluator("output_plots_cls", cfg.metrics.classifier)
tme.combine_results(evaluators)
tme.save_results()

In [None]:
task = "dm_multiclass"
for algorithm in algorithms:
    base_path = os.path.join("/home/laurits/ml-tau-en-reg/training-outputs/Trainings/v1", task, algorithm)
    for signal_sample in signal_samples:
        sig_info_data = data[signal_sample]
        sig_data = g.load_all_data(os.path.join(base_path, signal_sample + ".parquet"))

        output_dir = "output_plots_dm"
        evaluator = dme.DecayModeEvaluator(sig_data.dm_multiclass.pred, sig_data.dm_multiclass.target, output_dir, signal_sample, algorithm)
        evaluator.save_performance()


In [None]:
task = "jet_regression"
evaluators = []
for algorithm in algorithms:
    base_path = os.path.join("/home/laurits/ml-tau-en-reg/training-outputs/Trainings/v1", task, algorithm)
    for signal_sample in signal_samples:
        sig_info_data = data[signal_sample]
        sig_data = g.load_all_data(os.path.join(base_path, signal_sample + ".parquet"))

        evaluator = re.RegressionEvaluator(sig_data.jet_regression.pred, sig_data.jet_regression.target, cfg.metrics.regression, signal_sample.split("_")[0], algorithm)
        evaluators.append(evaluator)
output_dir = "output_plots_regression"
rme = re.RegressionMultiEvaluator(output_dir, cfg.metrics.regression, signal_sample.split("_")[0])
rme.combine_results(evaluators)
rme.save()