# Master notebook for evaluating  performaces


In [None]:
import os
import glob
import tqdm
import numpy as np
import mplhep as hep
import awkward as ak
import matplotlib.pyplot as plt
from hydra import compose, initialize
from omegaconf import OmegaConf

from enreg.tools import general as g
from enreg.tools.metrics import (
    regression_evaluator as re,
    decay_mode_evaluator as dme,
    tagger_evaluator as te
)

ModuleNotFoundError: No module named 'tqdm'

In [None]:
class PerformanceEvaluator:
    def __init__(self, config_path="../enreg/config/", config_name="benchmarking"):
        """
        Initialize the performance evaluator with configuration

        Args:
            config_path (str): Path to configuration directory
            config_name (str): Name of the configuration file
        """
        # Initialize configuration
        with initialize(version_base=None, config_path=config_path, job_name="performance_eval"):
            self.cfg = compose(config_name=config_name)

        # Use HEP style
        hep.style.use(hep.style.CMS)

        # Output directory for results
        self.output_dir = "/home/laurits/tmp/performance_evaluation"
        os.makedirs(self.output_dir, exist_ok=True)

    def load_datasets(self, datasets):
        """
        Load multiple datasets for evaluation
        
        Args:
            datasets (dict): Dictionary of dataset paths
        
        Returns:
            dict: Loaded datasets
        """
        loaded_data = {}
        for name, path in datasets.items():
            loaded_data[name] = g.load_all_data(path)
        return loaded_data

    def prepare_regression_evaluators(self, datasets):
        """
        Prepare regression evaluators for different models
        
        Args:
            datasets (dict): Loaded datasets
        
        Returns:
            dict: Regression evaluators
        """
        evaluators = {
            "HPS": re.RegressionEvaluator(
                datasets['hps'].pred_pt, 
                datasets['hps'].true_pt, 
                self.cfg.metrics.regression, 
                "zh", 
                "HPS"
            ),
            "RecoJet": re.RegressionEvaluator(
                datasets['recoJet'].reco_jet_pt, 
                datasets['recoJet'].gen_tau_pt, 
                self.cfg.metrics.regression, 
                "zh", 
                "RecoJet"
            ),
            # Add other model evaluators dynamically
        }
        
        # Dynamically add other model evaluators
        model_datasets = {
            "PT": datasets['PT'].jet_regression,
            "LN": datasets['LN'].jet_regression,
            "DS": datasets['DS'].jet_regression
        }
        
        for model_name, model_data in model_datasets.items():
            evaluators[model_name] = re.RegressionEvaluator(
                model_data.pred, 
                model_data.target, 
                self.cfg.metrics.regression, 
                "zh", 
                model_name
            )
        
        return evaluators

    def perform_regression_evaluation(self, evaluators):
        """
        Perform regression evaluation and generate plots
        
        Args:
            evaluators (dict): Regression evaluators
        """
        # Combine and save regression results
        multi_evaluator = re.RegressionMultiEvaluator(
            self.output_dir, 
            self.cfg.metrics.regression, 
            "zh"
        )
        
        # Adjust plot configurations if needed
        self.cfg.metrics.regression.ratio_plot.resolution_plot.ylim = [0, 0.2]
        self.cfg.metrics.regression.ratio_plot.response_plot.ylim = [0.96, 1.04]
        
        # Combine evaluators and save results
        multi_evaluator.combine_results(list(evaluators.values()))
        multi_evaluator.save()
        
        # Access specific plots
        resolution_plot = multi_evaluator.resolution_lineplot.fig
        response_plot = multi_evaluator.response_lineplot.fig

    def perform_decay_mode_evaluation(self, datasets):
        """
        Perform decay mode performance evaluation
        
        Args:
            datasets (dict): Loaded datasets
        """
        # Iterate through configured algorithms and samples
        for algorithm, algo_info in self.cfg.metrics.dm_reconstruction.algorithms.items():
            for signal_sample in self.cfg.metrics.dm_reconstruction.signal_samples:
                # Load signal data
                sig_data = g.load_all_data(
                    os.path.join(algo_info.data_dir, f"{signal_sample}.parquet")
                )
                
                # Create decay mode evaluator
                evaluator = dme.DecayModeEvaluator(
                    sig_data.dm_multiclass.pred, 
                    sig_data.dm_multiclass.target, 
                    os.path.join(self.output_dir, "decay_mode_plots"), 
                    signal_sample, 
                    algorithm
                )
                
                # Save performance metrics
                evaluator.save_performance()

    def run_full_evaluation(self):
        """
        Run complete performance evaluation workflow
        """
        # Define datasets to load
        dataset_paths = {
            "hps": "/home/laurits/HPS_recoCut0_ntuples/zh.parquet",
            "recoJet": "/home/laurits/ntuples/20240924_lowered_recoPtCut/recoJet/zh.parquet",
            "PT": "/home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/jet_regression/ParticleTransformer/zh_test.parquet",
            "LN": "/home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/jet_regression/LorentzNet/zh_test.parquet",
            "DS": "/home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/jet_regression/DeepSet/zh_test.parquet"
        }
        
        # Load datasets
        datasets = self.load_datasets(dataset_paths)
        
        # Perform regression evaluation
        regression_evaluators = self.prepare_regression_evaluators(datasets)
        self.perform_regression_evaluation(regression_evaluators)
        
        # Perform decay mode evaluation
        self.perform_decay_mode_evaluation(datasets)

In [None]:
def main():
    evaluator = PerformanceEvaluator()
    evaluator.run_full_evaluation()

if __name__ == "__main__":
    main()