In [1]:
#0: imports

import uproot 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from imports.data import CCV1
from torch_geometric.data import DataLoader 
from imports.models import Net_SEC, Net_GAT, Net_Trans
from torch_geometric.nn import knn_graph

import numpy as np
import awkward as ak
import time
from imports.Agglomerative import Aggloremative

import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

In [2]:
import pandas as pd
import os

# Path to folder
csv_folder = "csv_Agg"

# Loop through all CSV files in the folder
for filename in os.listdir(csv_folder):
    if filename.endswith(".csv"):
        df_name = filename.replace(".csv", "")  # Strip .csv to use as variable name
        file_path = os.path.join(csv_folder, filename)
        globals()[df_name] = pd.read_csv(file_path)  # Create variable in global scope
        print(f"Loaded {df_name}")


Loaded electron_GAT_hard
Loaded electron_GAT
Loaded pion_Trans_hard
Loaded electron_Trans
Loaded pion_SEC_hard
Loaded electron_Trans_hard
Loaded electron_SEC_hard
Loaded pion_SEC
Loaded pion_GAT
Loaded electron_SEC
Loaded pion_Trans
Loaded pion_GAT_hard


In [4]:
import numpy as np
import matplotlib.pyplot as plt
import csv

# Model types and display names
model_types = ["GAT", "SEC", "Trans"]
model_titles = {
    "GAT": "Graph Attention",
    "SEC": "StaticEdgeConv",
    "Trans": "Transformer"
}

# Mapping of curve keys to DataFrame variable names
df_name_map = {
    "electron_GAT": "electron_GAT",
    "electron_GAT_hard": "electron_GAT_hard",
    "pion_GAT": "pion_GAT",
    "pion_GAT_hard": "pion_GAT_hard",
    
    "electron_SEC": "electron_SEC",
    "electron_SEC_hard": "electron_SEC_hard",
    "pion_SEC": "pion_SEC",
    "pion_SEC_hard": "pion_SEC_hard",
    
    "electron_Trans": "electron_Trans",
    "electron_Trans_hard": "electron_Trans_hard",
    "pion_Trans": "pion_Trans",
    "pion_Trans_hard": "pion_Trans_hard",
}

# Colors for particles
base_colors = {
    "electron": "blue",
    "pion": "red"
}

# Threshold label in legend
threshold_label = "T"

# Custom overrides for GAT thresholds
custom_threshold_overrides = {
    "electron_GAT": 0.15,
    "electron_GAT_hard": 0.13,
    "pion_GAT": None,
    "pion_GAT_hard": None
}

# Store AUCs for CSV
auc_scores = {}

for model_type in model_types:
    title = model_titles[model_type]
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.set_title(title, fontsize=18)

    for particle in ["electron", "pion"]:
        for hardness in ["", "_hard"]:
            key = f"{particle}_{model_type}{hardness}"
            varname = df_name_map.get(key)
            if varname not in globals():
                print(f"⚠️ DataFrame '{varname}' not found. Skipping '{key}'.")
                continue

            df = globals()[varname].sort_values("threshold").reset_index(drop=True)
            fake_rate = df["fake_rate"].values
            efficiency = df["efficiency"].values
            threshold = df["threshold"].values
            ratio = df["num_tracksters_ratio"].values

            # Pad endpoints
            if fake_rate[0] > 0:
                fake_rate = np.insert(fake_rate, 0, 0.0)
                efficiency = np.insert(efficiency, 0, 0.0)
            if fake_rate[-1] < 1:
                fake_rate = np.append(fake_rate, 1.0)
                efficiency = np.append(efficiency, 1.0)

            # Compute AUC
            auc = np.trapz(efficiency, fake_rate)
            auc_scores[key] = auc

            # Find optimal threshold
            distances = np.sqrt((fake_rate - 0)**2 + (efficiency - 1)**2)
            if len(ratio) != len(distances):
                pad = len(distances) - len(ratio)
                ratio = np.insert(ratio, 0, [1.0]*pad)
            combined = distances + np.abs(ratio - 1)
            idx = np.argmin(combined)
            opt_T = threshold[idx] if idx < len(threshold) else threshold[-1]

            # Apply override for GAT if provided
            if model_type == "GAT" and custom_threshold_overrides.get(key) is not None:
                display_T = custom_threshold_overrides[key]
            else:
                display_T = opt_T

            # Plot
            color = base_colors[particle]
            ls = '--' if hardness else '-'
            label = (
                f"{particle.capitalize()}{' Hard' if hardness else ''} "
                f"(AUC={auc:.3f}, {threshold_label}={display_T:.2f})"
            )
            ax.plot(fake_rate, efficiency, ls, color=color, label=label)

    ax.set_xlabel("Fake Rate", fontsize=14)
    ax.set_ylabel("Efficiency", fontsize=14)
    ax.tick_params(labelsize=12)
    ax.grid(True)
    ax.legend(fontsize=12)

    # Save each plot as its own PDF
    filename = f"{title}.pdf"
    fig.savefig(filename, bbox_inches='tight')
    plt.close(fig)
    print(f"✅ Saved '{filename}'")

# Save all AUCs to CSV
with open("auc_scores.csv", "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Curve", "AUC"])
    for name, auc in sorted(auc_scores.items()):
        writer.writerow([name, f"{auc:.6f}"])
print("✅ All AUC scores saved to 'auc_scores.csv'")


✅ Saved 'Graph Attention.pdf'
✅ Saved 'StaticEdgeConv.pdf'
✅ Saved 'Transformer.pdf'
✅ All AUC scores saved to 'auc_scores.csv'
