In [None]:
import glob
import pandas
import matplotlib.pyplot as plt
import seaborn
import json
import numpy as np

import mplhep as hep
hep.style.use("CMS")

In [None]:
outdir = "../outputs/plots/"

In [None]:
color_map = {
    "DeepSet": '#ff5b5b',
    "LorentzNet": '#ffc140',
    "ParticleTransformer": '#89cded',
}

In [None]:
losses = {
    "jet_regression": {"LorentzNet": [], "ParticleTransformer": [], "DeepSet": []},
    "dm_multiclass": {"LorentzNet": [], "ParticleTransformer": [], "DeepSet": []},
}

for path in sorted(glob.glob("/local/joosep/ml-tau-en-reg/results/240611_PT2layers/*/*/*/history.json")):
    spl = path.split("/")
    model = spl[-2]
    if model == "SimpleDNN":
        model = "DeepSet"
    target = spl[-3]
    print(model, target)
    losses[target][model].append(json.load(open(path)))

In [None]:
losses_avg = {}
losses_std = {}
for target in ["dm_multiclass", "jet_regression"]:
    losses_avg[target] = {}
    losses_std[target] = {}
    for model in ["LorentzNet", "ParticleTransformer", "DeepSet"]:
        losses_avg[target][model] = {}
        losses_std[target][model] = {}
        for loss_name in ["losses_train", "losses_validation"]:
            loss = np.array([l[loss_name] for l in losses[target][model]])
            losses_avg[target][model][loss_name] = np.mean(loss, axis=0)
            losses_std[target][model][loss_name] = np.std(loss, axis=0)

In [None]:
training_type = "dm_multiclass"
training_title = "Decay mode classification"

fig = plt.figure()
ax = plt.axes()

for model in ["DeepSet", "LorentzNet", "ParticleTransformer"]: 
    plt.plot(losses_avg[training_type][model]["losses_train"], ls="--", lw=1, color=color_map[model])[0]
    plt.plot(losses_avg[training_type][model]["losses_validation"], label=model, color=color_map[model])
    plt.fill_between(
        range(100),
        losses_avg[training_type][model]["losses_train"] - losses_std[training_type][model]["losses_train"],
        losses_avg[training_type][model]["losses_train"] + losses_std[training_type][model]["losses_train"],
        color=color_map[model],
        alpha=0.5
    )
    
    plt.fill_between(
        range(100),
        losses_avg[training_type][model]["losses_validation"] - losses_std[training_type][model]["losses_validation"],
        losses_avg[training_type][model]["losses_validation"] + losses_std[training_type][model]["losses_validation"],
        color=color_map[model],
        alpha=0.5
    )
    
plt.ylim(0.2, 0.4)
leg1 = plt.legend(loc=1, frameon=False)
ax.add_artist(leg1)
plt.ylabel("Loss")
plt.xlabel("Training epoch")
plt.title(training_title)

import matplotlib.lines as mlines
handles = [mlines.Line2D([], [], color="black", ls="--"), mlines.Line2D([], [], color="black", ls="-")]
ax.legend(handles, ['training', 'validation'], loc=3, frameon=False)
plt.savefig(outdir + "/loss_{}.pdf".format(training_type))

In [None]:
training_type = "jet_regression"
training_title = "Momentum regression"

fig = plt.figure()
ax = plt.axes()

for model in ["DeepSet", "LorentzNet", "ParticleTransformer"]: 
    plt.plot(losses_avg[training_type][model]["losses_train"], ls="--", lw=1, color=color_map[model])[0]
    plt.plot(losses_avg[training_type][model]["losses_validation"], label=model, color=color_map[model])
    plt.fill_between(
        range(100),
        losses_avg[training_type][model]["losses_train"] - losses_std[training_type][model]["losses_train"],
        losses_avg[training_type][model]["losses_train"] + losses_std[training_type][model]["losses_train"],
        color=color_map[model],
        alpha=0.5
    )
    
    plt.fill_between(
        range(100),
        losses_avg[training_type][model]["losses_validation"] - losses_std[training_type][model]["losses_validation"],
        losses_avg[training_type][model]["losses_validation"] + losses_std[training_type][model]["losses_validation"],
        color=color_map[model],
        alpha=0.5
    )
    
plt.ylim(0.0, 0.02)
leg1 = plt.legend(loc=1, frameon=False)
ax.add_artist(leg1)
plt.ylabel("Loss")
plt.xlabel("Training epoch")
plt.title(training_title)

import matplotlib.lines as mlines
handles = [mlines.Line2D([], [], color="black", ls="--"), mlines.Line2D([], [], color="black", ls="-")]
ax.legend(handles, ['training', 'validation'], loc=3, frameon=False)
plt.savefig(outdir + "/loss_{}.pdf".format(training_type))

In [None]:
def extract_losses(model):
    vals_loss = []
    vals_loss_std = []
    
    for training_type in ["dm_multiclass", "jet_regression"]:
        bestidx = np.argmin(losses_avg[training_type][model]["losses_validation"])
        best_loss = losses_avg[training_type][model]["losses_validation"][bestidx]
        best_loss_std = losses_std[training_type][model]["losses_validation"][bestidx]
    
        vals_loss.append(best_loss)
        vals_loss_std.append(best_loss_std)
    
    vals_loss = np.array(vals_loss)
    vals_loss_std = np.array(vals_loss_std)

    return vals_loss, vals_loss_std

vals_loss_ds, vals_loss_std_ds = extract_losses("DeepSet")
vals_loss_pt, vals_loss_std_pt = extract_losses("ParticleTransformer")
vals_loss_ln, vals_loss_std_ln = extract_losses("LorentzNet")

fig, ax = plt.subplots()

x = [1,2]
labels = ["decay mode\nclassification", "momentum\nregression"]

# Plotting the data as points
ax.errorbar(vals_loss_ds/vals_loss_ds, x, xerr=vals_loss_std_ds/vals_loss_ds, label='DeepSet', color=color_map["DeepSet"], marker='o', ls='', ms=10, capsize=5)
ax.errorbar(vals_loss_ln/vals_loss_ds, x, xerr=vals_loss_std_ln/vals_loss_ds, label='LorentzNet', color=color_map["LorentzNet"], marker='o', ls='', ms=10, capsize=5)
ax.errorbar(vals_loss_pt/vals_loss_ds, x, xerr=vals_loss_std_pt/vals_loss_ds, label='ParticleTransformer', color=color_map["ParticleTransformer"], marker='o', ls='', ms=10, capsize=5)

ax.set_xlabel('Best validation loss, relative to DeepSet\n(lower is better)')
ax.set_title('Model comparison', y=1.05)
ax.set_yticks(x)
ax.tick_params(axis='y', which='both', bottom=False, top=False, labelbottom=True)
ax.tick_params(axis='x', which='both', left=True, right=False, labelleft=True)
ax.set_yticklabels(labels)
legend = ax.legend(loc='lower left', shadow=True, fancybox=True, framealpha=1, borderpad=1)
ax.set_ylim(0,3)
plt.savefig(outdir + "/best_losses.pdf")