In [1]:
import os
import json
from hydra import compose, initialize
from omegaconf import OmegaConf, DictConfig
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ml4cc.tools.evaluation import general as g
from ml4cc.tools.visualization import losses as l
from ml4cc.tools.visualization import regression as rv
from ml4cc.tools.visualization import classification as cv

with initialize(version_base=None, config_path="../config", job_name="test_app"):
    cfg = compose(config_name="main")

In [2]:
CLASSIFICATION_TRAININGS = {
    "two_step_minimal": "/home/laurits/ml4cc/training-outputs/20250622_FCC_twostep_minimal/two_step_minimal/",
    "two_step_pf": "/home/laurits/ml4cc/training-outputs/20250620_FCC_twostep_masked/two_step_pf/",
    # "two_step_pf_masked": "/home/laurits/ml4cc/training-outputs/20250620_FCC_twostep_masked/two_step_pf/",
    # "two_step_pf": "/home/laurits/ml4cc/training-outputs/20250618_FCC_twostep/two_step_pf/"
}

REGRESSION_TRAININGS = {
    "one_step": "/home/laurits/ml4cc/training-outputs/20250622_FCC_onestep/one_step/",
    "two_step_minimal": "/home/laurits/ml4cc/training-outputs/20250622_FCC_twostep_minimal/two_step_minimal/",
    "two_step_DNN": "/home/laurits/ml4cc/training-outputs/20250630_FCC_twostep_clusterization_DNN/two_step_cl",
    "two_step_RNN": "/home/laurits/ml4cc/training-outputs/20250630_FCC_twostep_clusterization_RNN/two_step_cl",
    "two_step_CNN": "/home/laurits/ml4cc/training-outputs/20250630_FCC_twostep_clusterization_CNN/two_step_cl",
    # Should add here also the clusterization trainings
}

NAME_MAPPING = {
    "two_step_minimal": r"$LSTM_{primary}$",
    "two_step_pf": r"$LSTM^{PF}_{both}$",
    "one_step": r"$TransformerEncoder^{one-step}$",
    "two_step_DNN": r"$LSTM^{PF} + DNN^{CL}$",
    "two_step_RNN": r"$LSTM^{PF} + RNN^{CL}$",
    "two_step_CNN": r"$LSTM^{PF} + CNN^{CL}$",
}
    
RESULTS_DIR = "/home/laurits/tmp/results"
os.makedirs(RESULTS_DIR, exist_ok=True)

# Classification

In [3]:
# Comparing ROC, AUC, etc.

loss_results = {}
for algo, training_dir in CLASSIFICATION_TRAININGS.items():
    metrics_path = os.path.join(training_dir, "logs/metrics.csv")
    losses = g.filter_losses(metrics_path=metrics_path)
    loss_results[algo] = losses
losses_output_path = os.path.join(RESULTS_DIR, "BCE_losses.png")
lp = l.LossesMultiPlot(loss_name="BCE", plot_train_losses=True, x_max=-1, name_mapping=NAME_MAPPING)
lp.plot_algorithms(results=loss_results, output_path=losses_output_path)


all_results = {}
loss_results_cls = {}
for algo, training_dir in CLASSIFICATION_TRAININGS.items():
    results_path = os.path.join(training_dir, "results/results.json")
    metrics_path = os.path.join(training_dir, "logs/metrics.csv")
    all_results[algo] = {}
    best_loss = np.min(g.filter_losses(metrics_path)["val_loss"])
    loss_results_cls[algo] = {"best_losses": [best_loss]}
    with open(results_path, 'rt') as in_file:
        results = json.load(in_file)
        for pid in results.keys():
            all_results[algo][pid] = results[pid]["global"]

lsp = l.LossesStackPlot(loss_name="BCE", name_mapping=NAME_MAPPING)
lsp_output_path = os.path.join(RESULTS_DIR, "cls_loss_stack.png")
lsp.plot_algorithms(loss_results_cls, lsp_output_path)

lsp2 = l.LossesStackPlot2(loss_name="BCE", name_mapping=NAME_MAPPING)
lsp_output_path2 = os.path.join(RESULTS_DIR, "cls_loss_stack2.png")
lsp2.plot_algorithms(loss_results_cls, lsp_output_path2)

asp = cv.AUCStackPlot(name_mapping=NAME_MAPPING)
asp_output_path = os.path.join(RESULTS_DIR, "AUC_stack.png")
asp.plot_algorithms(all_results, asp_output_path)

# Regression

In [4]:
loss_results = {}
for algo, training_dir in REGRESSION_TRAININGS.items():
    if not algo == "two_step_minimal":
        metrics_path = os.path.join(training_dir, "logs/metrics.csv")
        losses = g.filter_losses(metrics_path=metrics_path)
        loss_results[algo] = losses
lp = l.LossesMultiPlot(loss_name="MSE", plot_train_losses=True, x_max=-1, name_mapping=NAME_MAPPING)
losses_output_path = os.path.join(RESULTS_DIR, "MSE_losses.png")
lp.plot_algorithms(results=loss_results, output_path=losses_output_path)

# Comparing the resolution
all_results = {}
loss_results_reg = {}
for algo, training_dir in REGRESSION_TRAININGS.items():
    results_path = os.path.join(training_dir, "results/results.json")
    metrics_path = os.path.join(training_dir, "logs/metrics.csv")
    all_results[algo] = {}
    if not algo == "two_step_minimal":
        best_loss = np.min(g.filter_losses(metrics_path)["val_loss"])
        loss_results_reg[algo] = {"best_losses": [best_loss]}
    with open(results_path, 'rt') as in_file:
        results = json.load(in_file)
        for pid in results.keys():
            all_results[algo][pid] = results[pid]["global"]

rsp = rv.RegressionStackPlot(name_mapping=NAME_MAPPING)
rsp_output_path = os.path.join(RESULTS_DIR, "resolution_stack.png")
rsp.plot_algorithms(all_results, rsp_output_path)

lsp = l.LossesStackPlot(loss_name="MSE", name_mapping=NAME_MAPPING)
lsp_output_path = os.path.join(RESULTS_DIR, "regression_loss_stack.png")
lsp.plot_algorithms(loss_results_reg, lsp_output_path)

lsp2 = l.LossesStackPlot2(loss_name="MSE", name_mapping=NAME_MAPPING)
lsp2_output_path = os.path.join(RESULTS_DIR, "regression_loss_stack2.png")
lsp2.plot_algorithms(loss_results_reg, lsp2_output_path)