In [None]:
import os
import glob
import json
import numpy as np
import mplhep as hep
import awkward as ak
import matplotlib.pyplot as plt

hep.style.use(hep.styles.CMS)

In [None]:
FRACS = ["2e3", "1e4", "1e5", "1e6"]
MODEL_TYPES = {
    'fine_tuning': "Fine-tuning",
    'from_scratch': "From scratch",
    'fixed_backbone': "Fine-tuning \n(fixed backbone)",
    
}
TRAINING_TYPES = ['jet_regression', 'dm_multiclass', "binary_classification"]
PERFORMANCE_KEYS = {
    "jet_regression": ["loss_validation", "IQR_validation", "median_validation"],
    "binary_classification": ["loss_validation", "precision_validation", "recall_validation", "F1_validation"],
    "dm_multiclass": ["loss_validation", "precision_validation", "recall_validation", "F1_validation"]
}
Y_LABELS = {
    "IQR_validation": r"$p_T \, \, resol. (q_{75} - q_{25})/q_{50}$",
    "median_validation": r"$p_T scale (q_{50})$",
    "precision_validation": "precision",
    "recall_validation": "recall",
    "F1_validation": "F1",
    "loss_validation": "validation loss"
}

COLORS = {
    "fine_tuning": "green",
    "from_scratch": "red",
    "fixed_backbone": "blue",
}
EPOCH_COLORS = {
    "epoch_0": "green",
    "epoch_30": "red",
    "epoch_60": "magenta",
    "fixed_backbone": "blue",
}
LAYER_COLORS = {
    "GPT_layers_1": "green",
    "GPT_layers_2": "red",
    "GPT_layers_3": "blue",
}


EPOCH_0_UNFREEZE_DIR = "/home/laurits/ml-tau-en-reg/training-outputs/20250119_unfreeze_epoch_0/"
EPOCH_60_UNFREEZE_DIR = "/home/laurits/ml-tau-en-reg/training-outputs/20250123_unfreeze_epoch_60/"
BASE_DIR = "/home/laurits/ml-tau-en-reg/training-outputs/20201204_ParT_ntrain_v2"
GPT_LAYERS_1_DIR = "/home/laurits/ml-tau-en-reg/training-outputs/20250119_GPT_Layers_1/"
GPT_LAYERS_2_DIR = "/home/laurits/ml-tau-en-reg/training-outputs/20250119_GPT_Layers_2/"
OUTPUT_DIR = "/home/laurits/20250203_OmniParT_effects"
os.makedirs(OUTPUT_DIR, exist_ok=True)


# Effects of unfreezing

In [None]:
epoch_dirs = {
    "epoch_0": EPOCH_0_UNFREEZE_DIR,
    "epoch_30": BASE_DIR,
    "epoch_60": EPOCH_60_UNFREEZE_DIR,
    "fixed_backbone": BASE_DIR
}

epoch_name_mapping = {
    "epoch_0": "Epoch 0",
    "epoch_30": "Epoch 30",
    "epoch_60": "Epoch 60",
    "fixed_backbone": "Frozen"
}


def calculate_scenario_measures(wcp_path, keys):
    model_metrics = {key: {} for key in keys}
    metric_values = {key: [] for key in keys}
    for metrics_path in glob.glob(wcp_path):  # Loop over all runs (v_1, .. v_n)
        if not os.path.exists(metrics_path):
            print(f"ERROR: {metrics_path} is not started")
            continue
        with open(metrics_path, "rt") as in_file:
            metrics_info = json.load(in_file)
        min_loss_idx = np.argmin(metrics_info["loss_validation"])
        for key in keys:
            metric_values[key].append(np.nan_to_num(metrics_info[key][min_loss_idx]))
    for key in keys:
        model_metrics[key]["mean"] = np.mean(metric_values[key])
        model_metrics[key]["std"] = np.std(metric_values[key])
    return model_metrics


def restructure_results(model_metrics: dict, keys):
    restructured_results = {key: {} for key in keys}
    for key in keys:
        means = []
        stdevs = []
        for frac in FRACS:
            means.append(model_metrics[frac][key]["mean"])
            stdevs.append(model_metrics[frac][key]["std"])
        restructured_results[key]["mean"] = means
        restructured_results[key]["std"] = stdevs
    return restructured_results        
    


def get_model_metrics_values(training_type, em):
    model = "OmniParT_fine_tuning" if "epoch" in em else "OmniParT_fixed_backbone"
    keys = PERFORMANCE_KEYS[training_type]
    model_metrics = {}
    for frac in FRACS:
        wcp_path = os.path.join(epoch_dirs[em], "*", f"trainfrac_{frac}", training_type, model, "history.json")
        model_metrics[frac] = calculate_scenario_measures(wcp_path, keys)
    restructured_results = restructure_results(model_metrics, keys)
    return restructured_results


def get_unfreezing_metrics_values(training_type):
    # epoch_models = ["epoch_0", "epoch_30", "epoch_60", "fixed_backbone"]
    epoch_models = epoch_name_mapping.keys()
    # epoch_models = ["epoch_30", "fixed_backbone"]
    model_versions = {}
    for em in epoch_models:
        model_versions[em] = get_model_metrics_values(training_type, em)
    return model_versions


def plot_performance(metrics_values, key, output_path):
    fs = [float(f) for f in FRACS]
    for m_type, name in epoch_name_mapping.items():
        mean = np.array(metrics_values[m_type][key]["mean"])
        std = np.array(metrics_values[m_type][key]["std"])
        plt.plot(fs, mean, label=name, color=EPOCH_COLORS[m_type])
        plt.fill_between(fs, mean - std, mean + std, color=EPOCH_COLORS[m_type], alpha=0.3)
    plt.legend()
    plt.xscale("log")
    plt.xlabel("Number of training jets")
    plt.ylabel(Y_LABELS[key])
    plt.savefig(output_path, bbox_inches="tight", format="pdf")
    plt.close("all")


def plot_all_performances(training_type):
    metrics_values = get_unfreezing_metrics_values(training_type)
    output_dir = os.path.join(OUTPUT_DIR, training_type)
    os.makedirs(output_dir, exist_ok=True)
    for key in PERFORMANCE_KEYS[training_type]:
        output_path = os.path.join(output_dir, f"{key}_layerFreeze.pdf")
        plot_performance(metrics_values, key, output_path)

In [None]:
training_type = 'jet_regression'
key = 'loss_validation'
metrics_values_freeze = get_unfreezing_metrics_values(training_type)

In [None]:
for training_type in PERFORMANCE_KEYS.keys():
    plot_all_performances(training_type)

In [None]:
from matplotlib.ticker import FormatStrFormatter
metric_key = 'loss_validation'
trainSize = '2e3'
size_mapping = {i: value for i, value in enumerate(FRACS)}
inverse_size_mapping = {value: i for i, value in enumerate(FRACS)}
fig, axes = plt.subplots(3, 1, sharex=True)
color_mapping = {
    'jet_regression': 'red',
    'dm_multiclass': 'green',
    "binary_classification": "blue"
}
name_mapping = {
    'jet_regression': 'Kinematic reconstruction',
    'dm_multiclass': 'Decay mode reconstruction',
    "binary_classification": "Tagging"
}
plt.rcParams['mathtext.fontset'] = 'stix'
for training_type, ax in zip(PERFORMANCE_KEYS.keys(), axes):
    metrics_values_gpt = get_unfreezing_metrics_values(training_type)
    means = []
    stds = []
    for key, values in metrics_values_gpt.items():
        means.append(values[metric_key]['mean'][inverse_size_mapping[trainSize]])
        stds.append(values[metric_key]['std'][inverse_size_mapping[trainSize]])
    means = np.array(means)
    stds = np.array(stds)

    normed_means = means/np.min(means)
    normed_max_err = (means + stds) / np.min(means)
    normed_min_err = (means - stds) / np.min(means)
    ax.plot(np.arange(len(means)), normed_means, ls='--', marker="^", ms=15, label=name_mapping[training_type], color=color_mapping[training_type])
    ax.fill_between(np.arange(len(means)), normed_min_err, normed_max_err, alpha=0.3, color=color_mapping[training_type])
    # ax.legend(loc='upper center')
    ax.set_ylabel(r'$\mathcal{L}_{val} / min(\mathcal{L}_{val})$')
    ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    # ax.set_title(name_mapping[training_type], x=0.5, y=0.7, fontsize=22)
fig.subplots_adjust(wspace=0, hspace=0)
plt.xticks(range(4), ['0', '30', '60', '100'])
plt.xlim(-0.1, 3.1)
plt.xlabel('Backbone unfreezing epoch')
    
freeze_output_path = os.path.join(OUTPUT_DIR, "freeze_ablation.pdf")
fig.savefig(freeze_output_path, bbox_inches='tight')

# Effects of number of GPT layers

In [None]:
layer_dirs = {
    "GPT_layers_1": GPT_LAYERS_1_DIR,
    "GPT_layers_2": GPT_LAYERS_2_DIR,
    "GPT_layers_3": BASE_DIR,
}

layer_name_mapping = {
    "GPT_layers_1": "1 GPT layer",
    "GPT_layers_2": "2 GPT layers",
    "GPT_layers_3": "3 GPT layers",
}


def get_model_metrics_values_gptLayer(training_type, em):
    keys = PERFORMANCE_KEYS[training_type]
    model_metrics = {}
    for frac in FRACS:
        wcp_path = os.path.join(layer_dirs[em], "*", f"trainfrac_{frac}", training_type, "OmniParT_fine_tuning", "history.json")
        model_metrics[frac] = calculate_scenario_measures(wcp_path, keys)
    restructured_results = restructure_results(model_metrics, keys)
    return restructured_results


def get_metrics_values_gptLayer(training_type):
    model_types = layer_name_mapping.keys()
    model_versions = {}
    for em in model_types:
        model_versions[em] = get_model_metrics_values_gptLayer(training_type, em)
    return model_versions


def plot_performance_gptLayer(metrics_values, key, output_path):
    fs = [float(f) for f in FRACS]
    for m_type, name in layer_name_mapping.items():
        mean = np.array(metrics_values[m_type][key]["mean"])
        std = np.array(metrics_values[m_type][key]["std"])
        plt.plot(fs, mean, label=name, color=LAYER_COLORS[m_type])
        plt.fill_between(fs, mean - std, mean + std, color=LAYER_COLORS[m_type], alpha=0.3)
    plt.legend()
    plt.xscale("log")
    plt.xlabel("Number of training jets")
    plt.ylabel(Y_LABELS[key])
    plt.savefig(output_path, bbox_inches="tight", format="pdf")
    plt.close("all")


def plot_all_performances_gptLayer(training_type):
    metrics_values = get_metrics_values_gptLayer(training_type)
    output_dir = os.path.join(OUTPUT_DIR, training_type)
    os.makedirs(output_dir, exist_ok=True)
    for key in PERFORMANCE_KEYS[training_type]:
        output_path = os.path.join(output_dir, f"{key}_gptLayerCount.pdf")
        plot_performance_gptLayer(metrics_values, key, output_path)

# GPT layer impact

In [None]:
for training_type in PERFORMANCE_KEYS.keys():
    plot_all_performances_gptLayer(training_type)

In [None]:
metric_key = 'loss_validation'
trainSize = '2e3'
size_mapping = {i: value for i, value in enumerate(FRACS)}
inverse_size_mapping = {value: i for i, value in enumerate(FRACS)}
fig, axes = plt.subplots(3, 1, sharex=True)
color_mapping = {
    'jet_regression': 'red',
    'dm_multiclass': 'green',
    "binary_classification": "blue"
}
name_mapping = {
    'jet_regression': 'Kinematic reconstruction',
    'dm_multiclass': 'Decay mode reconstruction',
    "binary_classification": "Tagging"
}
plt.rcParams['mathtext.fontset'] = 'stix'
for training_type, ax in zip(PERFORMANCE_KEYS.keys(), axes):
    metrics_values_gpt = get_metrics_values_gptLayer(training_type)
    means = []
    stds = []
    for key, values in metrics_values_gpt.items():
        means.append(values[metric_key]['mean'][inverse_size_mapping[trainSize]])
        stds.append(values[metric_key]['std'][inverse_size_mapping[trainSize]])
    means = np.array(means)
    stds = np.array(stds)

    normed_means = means/np.min(means)
    normed_max_err = (means + stds) / np.min(means)
    normed_min_err = (means - stds) / np.min(means)
    ax.plot(np.arange(len(means)), normed_means, ls='--', marker="^", ms=15, label=name_mapping[training_type], color=color_mapping[training_type])
    ax.fill_between(np.arange(len(means)), normed_min_err, normed_max_err, alpha=0.3, color=color_mapping[training_type])
    # ax.legend(loc='upper center')
    ax.set_ylabel(r'$\mathcal{L}_{val} / min(\mathcal{L}_{val})$')
    ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    ax.set_title(name_mapping[training_type], x=0.75, y=0.7, fontsize=22)
fig.subplots_adjust(wspace=0, hspace=0)
plt.xticks(range(3), ["1", "2", "3"])
plt.minorticks_off()
plt.xlim(-0.1, 2.1)
plt.xlabel("Number of GPT layers")
    
freeze_output_path = os.path.join(OUTPUT_DIR, "GPT_layer_ablation.pdf")
fig.savefig(freeze_output_path, bbox_inches='tight')

# Jet resolution improvement

In [None]:
import boost_histogram as bh

def to_bh(data, bins, cumulative=False):
    h1 = bh.Histogram(bh.axis.Variable(bins))
    h1.fill(data)
    if cumulative:
        h1[:] = np.sum(h1.values()) - np.cumsum(h1)
    return h1

def calculate_bin_centers(edges: list) -> np.array:
    bin_widths = np.array([edges[i + 1] - edges[i] for i in range(len(edges) - 1)])
    bin_centers = []
    for i in range(len(edges) - 1):
        bin_centers.append(edges[i] + (bin_widths[i] / 2))
    return np.array(bin_centers), bin_widths / 2


def IQR(ratios: np.array) -> np.array:
    return np.quantile(ratios, 0.75) - np.quantile(ratios, 0.25)


algo_colors = {
    "fine_tuning": "green",
    "from_scratch": "red"
}


def calculate_response(data):
    pred = data.jet_regression.pred
    target = data.jet_regression.target
    return pred/target

bins = np.linspace(0.5, 1.5, 31)

sample = "z"
fig, ax = plt.subplots(figsize=(10, 10))
for algo in ["fine_tuning", "from_scratch"]:
    histograms = []
    resolutions = []
    avg_responses = []
    for version in ['v1', "v2", "v3"]:
        predictions_path = os.path.join(BASE_DIR, version, f"trainfrac_1e4", "jet_regression", f"OmniParT_{algo}", f"{sample}_test.parquet")
        data = ak.from_parquet(predictions_path)
        responses = calculate_response(data)
        avg_response = np.mean(responses)
        avg_responses.append(avg_response)
        resolution = IQR(responses)
        resolutions.append(resolution)
        # print(fr"{algo} {version} IQR: {resolution:.3f}\t $\mu$: {avg_response}")
        histograms.append(np.histogram(responses, bins=bins)[0])
    histograms = np.array(histograms)
    mean_values = np.mean(histograms, axis=0)
    std_values = np.std(histograms, axis=0)
    runs_avg_response = np.mean(avg_responses)
    runs_std_response = np.std(avg_responses)
    print(f"{algo}")
    print(f"Response: {runs_avg_response:.3f} +/- {runs_std_response:.3f}")
    runs_avg_resolution = np.mean(resolutions)
    runs_std_resolution = np.std(resolutions)
    print(f"Resolution: {runs_avg_resolution:.3f} +/- {runs_std_resolution:.3f}")
    # bin_centers = calculate_bin_centers(bins)[0]
    # ax.plot(bin_centers, mean_values, label=fr"{MODEL_TYPES[algo]}", color=algo_colors[algo])
    # ax.fill_between(bin_centers, mean_values, mean_values - std_values, mean_values + std_values, alpha=0.3, color=algo_colors[algo])
    hep.histplot((mean_values, bins), yerr=std_values, histtype='band', ax=ax, density=True, color=algo_colors[algo], edgecolor=algo_colors[algo])
    hep.histplot((mean_values, bins), ax=ax, density=True, label=fr"{MODEL_TYPES[algo]}", color=algo_colors[algo])
ax.set_ylabel("Bin content [a.u.]")
ax.set_xlabel(r"$p_{T}^{pred} / p_{T}^{true}$")
plt.legend()
# # plt.grid()
reso_output_path = os.path.join('/home/laurits', "resolution_10k.pdf")
plt.savefig(reso_output_path, bbox_inches='tight')

In [None]:
print(f"Resolution improvement: {100* (0.143 - 0.068) / 0.143:.1f}%")
print(f"Response improvement: {100* (1.082 - 1.027) / 1.082:.1f}%")

# Decay mode ROC plot

In [None]:
def softmax(X, theta = 1.0, axis = None):
    """
    Compute the softmax of each element along an axis of X.

    Parameters
    ----------
    X: ND-Array. Probably should be floats.
    theta (optional): float parameter, used as a multiplier
        prior to exponentiation. Default = 1.0
    axis (optional): axis to compute values along. Default is the
        first non-singleton axis.

    Returns an array the same size as X. The result will sum to 1
    along the specified axis.
    """

    # make X at least 2d
    y = np.atleast_2d(X)

    # find axis
    if axis is None:
        axis = next(j[0] for j in enumerate(y.shape) if j[1] > 1)

    # multiply y against the theta parameter,
    y = y * float(theta)

    # subtract the max for numerical stability
    y = y - np.expand_dims(np.max(y, axis = axis), axis)

    # exponentiate y
    y = np.exp(y)

    # take the sum along the specified axis
    ax_sum = np.expand_dims(np.sum(y, axis = axis), axis)

    # finally: divide elementwise
    p = y / ax_sum

    # flatten if X was 1D
    if len(X.shape) == 1: p = p.flatten()

    return p


In [None]:
def calculate_fpr_tpr_with_thresholds(y_true, y_pred, thresholds):
    fpr = []
    tpr = []
    signal_mask = y_true == 1
    background_mask = y_true == 0
    signal_samples = y_pred[signal_mask]
    background_samples = y_pred[background_mask]
    total_signal = sum(signal_mask)
    total_background = sum(background_mask)
    for thr in thresholds:
        bkg_above_thr = sum(background_samples > thr)
        fpr.append(bkg_above_thr / total_background)
        sig_above_thr = sum(signal_samples > thr)
        tpr.append(sig_above_thr / total_signal)
    return np.array(fpr), np.array(tpr)


def calculate_eff_fake(pred, true, thresholds):
    efficiencies = []
    fakerates = []
    signal_mask = true == 1
    num_sig = ak.sum(signal_mask)
    num_bkg = ak.sum(~signal_mask)
    for threshold in thresholds:
        efficiency = ak.sum((pred[signal_mask] > threshold)) / num_sig
        fakerate = ak.sum((pred[~signal_mask] > threshold)) / num_bkg
        efficiencies.append(efficiency)
        fakerates.append(fakerate)
    return np.array(fakerates), np.array(efficiencies)

In [None]:
sample = "z"

from hydra import compose, initialize
from omegaconf import OmegaConf
import tqdm
import torch
from enreg.tools.models.OmniParT import OmniParT
from enreg.tools.models.ParticleTransformer import ParticleTransformer
from torch.utils.data import DataLoader
from enreg.tools.data_management.particleTransformer_dataset import load_row_groups, ParticleTransformerDataset
from sklearn.metrics import roc_curve
from scipy.interpolate import interp1d

with initialize(version_base=None, config_path="../enreg/config/", job_name="test_app"):
    cfg = compose(config_name="model_training")


def one_hot_encode(values):
    a = np.array(values)
    b = np.zeros((a.size, a.max() + 1))
    b[np.arange(a.size), a] = 1
    return b

def unpack_data(X, dev, feature_set):
    # Create a dictionary for each feature
    features_as_dict = {
        feature: X[feature].to(device=dev) for feature in feature_set
    }

    # Concatenate chosen features
    particle_features = torch.cat([features_as_dict[feat] for feat in feature_set], axis=1)

    cand_kinematics = X["cand_kinematics"].to(device=dev)
    mask = X["mask"].to(device=dev).bool()
    return particle_features, cand_kinematics, mask


def create_pred_true(algo, version='v1'):
    algo_model_path = os.path.join(BASE_DIR, version, f"trainfrac_{frac}", "dm_multiclass", f"OmniParT_{algo}", f"model_best.pt")
    cfg.models.OmniParT.version = algo
    model = OmniParT(
        input_dim=10,
        cfg=cfg.models.OmniParT,
        num_classes=6,
        num_layers=cfg.models.OmniParT.hyperparameters.num_layers,
        embed_dims=cfg.models.OmniParT.hyperparameters.embed_dims,
        use_pre_activation_pair=False,
        for_inference=False,
        use_amp=False,
        metric='eta-phi',
        verbosity=cfg.verbosity,
    ).to(device='cpu')
    
    model.load_state_dict(torch.load(algo_model_path, map_location="cpu"))
    model.eval()

    
    data = load_row_groups(os.path.join(cfg.data_path, f"{sample}_test.parquet"))
    dataset_full = ParticleTransformerDataset(
        row_groups=data,
        cfg=cfg.dataset,
        reco_jet_pt_cut=cfg.reco_jet_pt_cut[cfg.training_type]
    )

    dataloader_full = DataLoader(
        dataset_full,
        batch_size=cfg.training.batch_size,
    )
    preds = []
    targets = []

    for (X, y, weight) in tqdm.tqdm(dataloader_full, total=len(dataloader_full)):
        model_inputs = unpack_data(X, "cpu", ["cand_omni_features_wPID"])
        y_for_loss = y["dm_multiclass"]
        with torch.no_grad():
            pred = model(*model_inputs)
            preds.extend(pred.detach().cpu().numpy())
            targets.extend(y_for_loss.detach().cpu().numpy())
    preds = np.array(preds)
    targets = np.array(targets)
    return preds, targets

In [None]:
from sklearn.metrics import roc_auc_score
decay_mode_name_mapping = {
            0: r"$h^{\pm}$",
            1: r"$h^{\pm}\pi^0$",
            2: r'$h^\pm+\geq2\pi^0$',
            3: r"$h^{\pm}h^{\mp}h^{\pm}$",
            4: r"$h^{\pm}h^{\mp}h^{\pm}+\geq\pi^0$",
            5: "Rare",
        }

def get_eff_fr(true, preds, output_path):
    thresholds = np.linspace(0, 1, num=100)
    fakerates = {}
    efficiencies = {}
    all_fprs = []
    all_tprs = []
    all_aucs = []
    for i in range(6):
        # fakerates[version], efficiencies[version] = calculate_eff_fake(preds[:, i], true[:, i], thresholds)
        fpr, tpr = calculate_fpr_tpr_with_thresholds(true[:, i], preds[:, i], thresholds)
        class_proportion = (np.sum(np.argmax(true, axis=1) == i)) / len(true)
        # plt.plot(efficiencies[version], fakerates[version], label=decay_mode_name_mapping[i])
        plt.plot(tpr, fpr, label=decay_mode_name_mapping[i])
        auc = roc_auc_score(true[:, i], preds[:, i])
        print(f"{decay_mode_name_mapping[i]}: {auc}")
        all_aucs.append(auc*class_proportion)
        all_fprs.append(fpr*class_proportion)
        all_tprs.append(tpr*class_proportion)
    all_fprs = np.array(all_fprs)
    all_tprs = np.array(all_tprs)
    all_aucs = np.array(all_aucs)
    plt.legend()
    plt.ylabel(r"$P_{misID}$")
    plt.xlabel(r"$\epsilon_{DM}$")
    plt.yscale("log")
    plt.savefig(output_path)
    plt.close('all')
    avg_auc = np.sum(all_aucs)
    print(f"avg auc: {avg_auc}")
    avg_fpr = np.sum(all_fprs, axis=0)
    avg_tpr = np.sum(all_tprs, axis=0)
    return avg_fpr, avg_tpr, avg_auc

In [None]:
def get_dm_algo_results(algorithm):
    avg_fr = {}
    avg_eff = {}
    aucs = []
    for version in ['v1', 'v2', 'v3']:
        preds, targets = create_pred_true(algorithm, version=version)
        preds_sm = softmax(preds, axis=1)
        y_true_ohe = one_hot_encode(targets)
        output_path = f"/home/laurits/tmp/{algorithm}_dm_multiclass_{version}.pdf"
        avg_fr[version], avg_eff[version], auc = get_eff_fr(y_true_ohe, preds_sm, output_path)
        aucs.append(auc)
    print(fr"{algorithm} AUC: {np.mean(aucs):.3f} +/- {np.std(aucs):.3f}")
    return avg_fr, avg_eff


avg_fr_ft, avg_eff_ft = get_dm_algo_results("fine_tuning")
# avg_fr_fs, avg_eff_fs = get_dm_algo_results("from_scratch")


In [None]:
x_common = np.linspace(0, 1, 101)
y1_interp_ft = interp1d(avg_fr_ft['v1'], avg_eff_ft['v1'], kind='linear', fill_value='extrapolate')(x_common)
y2_interp_ft = interp1d(avg_fr_ft['v2'], avg_eff_ft['v2'], kind='linear', fill_value='extrapolate')(x_common)
y3_interp_ft = interp1d(avg_fr_ft['v3'], avg_eff_ft['v3'], kind='linear', fill_value='extrapolate')(x_common)
y_stack_ft = np.vstack([y1_interp_ft, y2_interp_ft, y3_interp_ft])
y_mean_ft = np.mean(y_stack_ft, axis=0)
y_std_ft = np.std(y_stack_ft, axis=0)

y1_interp_fs = interp1d(avg_fr_fs['v1'], avg_eff_fs['v1'], kind='linear', fill_value='extrapolate')(x_common)
y2_interp_fs = interp1d(avg_fr_fs['v2'], avg_eff_fs['v2'], kind='linear', fill_value='extrapolate')(x_common)
y3_interp_fs = interp1d(avg_fr_fs['v3'], avg_eff_fs['v3'], kind='linear', fill_value='extrapolate')(x_common)

y_stack_fs = np.vstack([y1_interp_fs, y2_interp_fs, y3_interp_fs])
y_mean_fs = np.mean(y_stack_fs, axis=0)
y_std_fs = np.std(y_stack_fs, axis=0)

In [None]:
plt.plot(x_common, y_mean_fs, label="From scratch", color="red")
plt.fill_between(x_common, y_mean_fs - y_std_fs, y_mean_fs + y_std_fs, alpha=0.3, color="red")
plt.plot(x_common, y_mean_ft, label="Fine-tuning", color="green")
plt.fill_between(x_common, y_mean_ft - y_std_ft, y_mean_ft + y_std_ft, alpha=0.3, color="green")
plt.legend()
plt.ylabel("Average true positive rate")
plt.xlabel("Average false positive rate")
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.savefig("/home/laurits/tmp/dm_multiclass_roc.pdf", bbox_inches='tight')

In [None]:
ft_AUC = np.sum(np.multiply(0.01, y_mean_ft))
fs_AUC = np.sum(np.multiply(0.01, np.nan_to_num(y_mean_fs, 0)))

print(f"Fine-tuning AUC: {ft_AUC:.3f}")
print(f"From scratch AUC: {fs_AUC:.3f}")
print(f"Improvement: {100*abs(fs_AUC-ft_AUC)/fs_AUC:.3f} %")


# Tagger ROC

In [None]:
def get_algo_results(algorithm):
    aucs = []
    fakerates = {}
    efficiencies = {}
    thresholds = np.linspace(0, 1, 101)
    for version in ['v1', 'v2', 'v3']:
        path = os.path.join(BASE_DIR, version, f"trainfrac_{frac}", "binary_classification", f"OmniParT_{algorithm}", f"{sample}_test.parquet")
        bkg_path = os.path.join(BASE_DIR, version, f"trainfrac_{frac}", "binary_classification", f"OmniParT_{algorithm}", f"qq_test.parquet")
        data = ak.from_parquet(path).binary_classification
        bkg = ak.from_parquet(bkg_path).binary_classification
    
        pred = ak.concatenate([data.pred, bkg.pred])
        true = ak.concatenate([data.target, bkg.target])
    
        fakerates[version], efficiencies[version] = calculate_eff_fake(pred, true, thresholds)
        auc = roc_auc_score(true, pred)
        # print(f"{algorithm} {version} AUC: {auc}")
        aucs.append(auc)
    print(fr"{algorithm} AUC: {np.mean(aucs):.3f} +/- {np.std(aucs):.3f}")
    return fakerates, efficiencies, aucs



x_common = np.linspace(0, 1, 101)

fprs_ft, tprs_ft, aucs_ft = get_algo_results("fine_tuning")
y1_interp_ft = interp1d(tprs_ft['v1'], fprs_ft['v1'], kind='linear', fill_value='extrapolate')(x_common)
y2_interp_ft = interp1d(tprs_ft['v2'], fprs_ft['v2'], kind='linear', fill_value='extrapolate')(x_common)
y3_interp_ft = interp1d(tprs_ft['v3'], fprs_ft['v3'], kind='linear', fill_value='extrapolate')(x_common)
y_stack_ft = np.vstack([y1_interp_ft, y2_interp_ft, y3_interp_ft])
y_mean_ft = np.mean(y_stack_ft, axis=0)
y_std_ft = np.std(y_stack_ft, axis=0)

fprs_fs, tprs_fs, aucs_fs = get_algo_results("from_scratch")
y1_interp_fs = interp1d(tprs_fs['v1'], fprs_fs['v1'], kind='linear', fill_value='extrapolate')(x_common)
y2_interp_fs = interp1d(tprs_fs['v2'], fprs_fs['v2'], kind='linear', fill_value='extrapolate')(x_common)
y3_interp_fs = interp1d(tprs_fs['v3'], fprs_fs['v3'], kind='linear', fill_value='extrapolate')(x_common)

y_stack_fs = np.vstack([y1_interp_fs, y2_interp_fs, y3_interp_fs])
y_mean_fs = np.mean(y_stack_fs, axis=0)
y_std_fs = np.std(y_stack_fs, axis=0)


In [None]:
plt.plot(x_common, y_mean_fs, label="From scratch", color="red")
plt.fill_between(x_common, y_mean_fs - y_std_fs, y_mean_fs + y_std_fs, alpha=0.3, color="red")
plt.plot(x_common, y_mean_ft, label="Fine-tuning", color="green")
plt.fill_between(x_common, y_mean_ft - y_std_ft, y_mean_ft + y_std_ft, alpha=0.3, color="green")
plt.legend()
plt.yscale("log")
plt.ylabel(r"$P_{misID}$")
plt.xlabel(r"$\epsilon_{\tau}$")
plt.xlim([0, 1])
plt.savefig("/home/laurits/tmp/binary_cls_roc.pdf", bbox_inches='tight')


In [None]:
# At 80% efficiency:
print(f"From scratch \t P_misID: {y_mean_fs[80]:.3f} +/- {y_std_fs[80]:.3f}")
print(f"Fine-tuning \t  P_misID: {y_mean_ft[80]:.3f} +/- {y_std_ft[80]:.3f}")


In [None]:
print(f"P_misID improvement at 80% efficiency: {100*(0.039 - 0.031)/0.039:.1f}%")