In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm
from scipy.special import logit
import torch
import torch.nn as nn
from tqdm import tqdm
from nflows.distributions import StandardNormal, ConditionalDiagonalNormal
from nflows.transforms import CompositeTransform, ReversePermutation, MaskedAffineAutoregressiveTransform
from nflows.flows import Flow

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Load classifier training data
X_train = np.load('/home/katya.govorkova/gwak2/gwak/output/ResNet_wnb_HL/embeddings.npy')
y_train_full = np.load('/home/katya.govorkova/gwak2/gwak/output/ResNet_wnb_HL/labels.npy')
context_train = np.load('/home/katya.govorkova/gwak2/gwak/output/ResNet_wnb_HL/correlations.npy')

# Load classifier + NF test data (signals)
X_test = np.load('/home/katya.govorkova/gwak2/gwak/output/ResNet_signals_HL/embeddings.npy')
y_test_full = np.load('/home/katya.govorkova/gwak2/gwak/output/ResNet_signals_HL/labels.npy')
context_test = np.load('/home/katya.govorkova/gwak2/gwak/output/ResNet_signals_HL/correlations.npy')

# Load second test set (background)
X_bkg = np.load('/home/katya.govorkova/gwak2/gwak/output/ResNet_HL/embeddings.npy')
y_bkg_full = np.load('/home/katya.govorkova/gwak2/gwak/output/ResNet_HL/labels.npy')
context_bkg = np.load('/home/katya.govorkova/gwak2/gwak/output/ResNet_HL/correlations.npy')

# Print shapes for sanity check
print("X_train:", X_train.shape)
print("y_train:", y_train_full.shape)
print("context_train:", context_train.shape)

print("X_test (signals):", X_test.shape)
print("y_test (signals):", y_test_full.shape)
print("context_test (signals):", context_test.shape)

print("X_bkg:", X_bkg.shape)
print("y_bkg:", y_bkg_full.shape)
print("context_bkg:", context_bkg.shape)

X_train: (199936, 16)
y_train: (199936,)
context_train: (199936, 1)
X_test (signals): (199936, 16)
y_test (signals): (199936,)
context_test (signals): (199936, 1)
X_bkg: (99968, 16)
y_bkg: (99968,)
context_bkg: (99968, 1)


In [6]:
import numpy as np

# Identify label of the signal class in training set
train_signal_label = np.min(y_train_full)
print(f"Training signal label: {train_signal_label}")

# Extract only signal examples from training set
signal_mask_train = (y_train_full == train_signal_label)
X_train_signal = X_train[signal_mask_train]
context_train_signal = context_train[signal_mask_train]

print(f"[NF Training] Signal samples: {len(X_train_signal)}")

# Prepare full signal+background test set (no filtering yet)
X_test_filtered = X_test.copy()
y_test_filtered = y_test_full.copy()
context_test_filtered = context_test.copy()

# Binary labels for evaluation
test_signal_label = train_signal_label  # use same label
y_test_binary = (y_test_filtered == test_signal_label).astype(int)

print(f"[Test Set] Signals: {np.sum(y_test_binary == 1)}, Background: {np.sum(y_test_binary == 0)}")

# Use full background-only set as-is
# X_bkg, y_bkg_full, context_bkg already loaded
print(f"[Background-only Test Set] Samples: {len(X_bkg)}")

Training signal label: 1.0
[NF Training] Signal samples: 99968
[Test Set] Signals: 0, Background: 49984
[Background-only Test Set] Samples: 99968


In [7]:
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader

# Split signal-only training set into train/val
X_tr, X_val, ctx_tr, ctx_val = train_test_split(
    X_train_signal, context_train_signal, test_size=0.2, random_state=42
)

# Convert to torch tensors
X_tr_tensor = torch.tensor(X_tr, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
ctx_tr_tensor = torch.tensor(ctx_tr, dtype=torch.float32)
ctx_val_tensor = torch.tensor(ctx_val, dtype=torch.float32)

# Train and validation datasets for conditional NF
train_ds = TensorDataset(X_tr_tensor, ctx_tr_tensor)
val_ds = TensorDataset(X_val_tensor, ctx_val_tensor)

# Test set (signals + background) with labels
X_test_tensor = torch.tensor(X_test_filtered, dtype=torch.float32)
ctx_test_tensor = torch.tensor(context_test_filtered, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_binary, dtype=torch.float32)
test_ds = TensorDataset(X_test_tensor, ctx_test_tensor, y_test_tensor)

# Background-only test set (no filtering)
X_bkg_tensor = torch.tensor(X_bkg, dtype=torch.float32)
ctx_bkg_tensor = torch.tensor(context_bkg, dtype=torch.float32)
bkg_ds = TensorDataset(X_bkg_tensor, ctx_bkg_tensor)

# DataLoaders
train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=128)
test_loader = DataLoader(test_ds, batch_size=128)
bkg_loader = DataLoader(bkg_ds, batch_size=128)

In [8]:
import pandas as pd

def summarize_features(X, name):
    df = pd.DataFrame(X, columns=[f'feat_{i}' for i in range(X.shape[1])])
    summary = pd.DataFrame({
        'min': df.min(),
        'max': df.max(),
        'mean': df.mean()
    })
    print(f"\n{name} feature summary:")
    display(summary)

# Summarize embedding features
summarize_features(X_train_signal, "Training Embeddings (Signal Only)")
summarize_features(X_test_filtered, "Test Embeddings (Signals + Background)")
summarize_features(X_bkg, "Test Embeddings (Background Only)")

# Summarize context features
summarize_features(context_train_signal, "Training Context (Signal Only)")
summarize_features(context_test_filtered, "Test Context (Signals + Background)")
summarize_features(context_bkg, "Test Context (Background Only)")


Training Embeddings (Signal Only) feature summary:


Unnamed: 0,min,max,mean
feat_0,-0.823491,1.768859,0.518016
feat_1,-1.828741,0.307277,-1.081162
feat_2,-0.106528,2.227935,1.364174
feat_3,-3.631402,-0.920592,-2.54412
feat_4,-0.792154,1.637247,0.537391
feat_5,-2.176033,0.280187,-1.270001
feat_6,-2.837937,0.49991,-0.662403
feat_7,-6.71572,-2.348159,-5.549748
feat_8,-0.756428,0.159245,-0.248232
feat_9,-2.548387,0.630174,-1.051776



Test Embeddings (Signals + Background) feature summary:


Unnamed: 0,min,max,mean
feat_0,-0.884004,1.712306,0.293583
feat_1,-1.7025,0.385448,-0.95361
feat_2,-0.277713,2.224108,1.231197
feat_3,-3.562857,-0.865574,-2.478424
feat_4,-0.727057,1.710533,0.566758
feat_5,-2.194729,0.666447,-1.134076
feat_6,-2.957579,0.447521,-0.887502
feat_7,-6.692633,-2.577528,-5.540372
feat_8,-0.830857,0.341462,-0.237391
feat_9,-2.464218,0.676337,-0.86181



Test Embeddings (Background Only) feature summary:


Unnamed: 0,min,max,mean
feat_0,-0.923425,1.544518,0.075557
feat_1,-1.602362,0.356237,-0.830534
feat_2,-0.28645,2.032506,1.101465
feat_3,-3.369917,-0.613869,-2.414099
feat_4,-0.765827,1.72749,0.595974
feat_5,-1.965027,0.666447,-1.001327
feat_6,-2.9576,0.246266,-1.106921
feat_7,-6.462726,-2.335717,-5.530254
feat_8,-0.782748,0.341462,-0.226501
feat_9,-2.187151,0.736296,-0.677595



Training Context (Signal Only) feature summary:


Unnamed: 0,min,max,mean
feat_0,-0.332359,0.309181,1.5e-05



Test Context (Signals + Background) feature summary:


Unnamed: 0,min,max,mean
feat_0,-0.2862,0.382235,3.1e-05



Test Context (Background Only) feature summary:


Unnamed: 0,min,max,mean
feat_0,-0.316513,0.345902,0.000131


In [9]:
import torch
import torch.nn as nn
from tqdm import tqdm
from nflows.distributions import StandardNormal, ConditionalDiagonalNormal
from nflows.transforms import CompositeTransform, ReversePermutation, MaskedAffineAutoregressiveTransform
from nflows.flows import Flow

# Config
n_dims = X_tr.shape[1]
context_dims = ctx_tr.shape[1]
n_flow_steps = 5
hidden_dim = 128
use_conditioning = True

# Build flow transform
transforms = []
for _ in range(n_flow_steps):
    maf = MaskedAffineAutoregressiveTransform(
        features=n_dims,
        hidden_features=hidden_dim,
        num_blocks=4,
        context_features=context_dims if use_conditioning else 0
    )
    transforms.append(maf)
    transforms.append(ReversePermutation(features=n_dims))

transform = CompositeTransform(transforms)

# Base distribution
if use_conditioning:
    base_dist = ConditionalDiagonalNormal(
        shape=[n_dims],
        context_encoder=nn.Linear(context_dims, 2 * n_dims)
    )
else:
    base_dist = StandardNormal([n_dims])

# Create flow model
flow = Flow(transform=transform, distribution=base_dist).to(device)

# Optimizer
optimizer = torch.optim.Adam(flow.parameters(), lr=1e-3)

# Training loop with validation
epochs = 20
for epoch in range(epochs):
    flow.train()
    total_train_loss = 0.0
    for xb, ctx in tqdm(train_loader, desc=f"[Train] Epoch {epoch+1:02d}"):
        xb, ctx = xb.to(device), ctx.to(device)
        optimizer.zero_grad()
        loss = -flow.log_prob(inputs=xb, context=ctx).mean()
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()

    avg_train_loss = total_train_loss / len(train_loader)

    # Validation
    flow.eval()
    total_val_loss = 0.0
    with torch.no_grad():
        for xb, ctx in val_loader:
            xb, ctx = xb.to(device), ctx.to(device)
            val_loss = -flow.log_prob(inputs=xb, context=ctx).mean()
            total_val_loss += val_loss.item()

    avg_val_loss = total_val_loss / len(val_loader)

    print(f"Epoch {epoch+1:02d} | Train NLL: {avg_train_loss:.4f} | Val NLL: {avg_val_loss:.4f}")

[Train] Epoch 01: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:14<00:00, 44.62it/s]


Epoch 01 | Train NLL: -18.8666 | Val NLL: -23.8698


[Train] Epoch 02: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:14<00:00, 43.32it/s]


Epoch 02 | Train NLL: -24.4979 | Val NLL: -25.3721


[Train] Epoch 03: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:14<00:00, 43.33it/s]


Epoch 03 | Train NLL: -25.4077 | Val NLL: -25.8603


[Train] Epoch 04: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 45.06it/s]


Epoch 04 | Train NLL: -25.7593 | Val NLL: -26.0934


[Train] Epoch 05: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:14<00:00, 43.95it/s]


Epoch 05 | Train NLL: -26.1082 | Val NLL: -26.1460


[Train] Epoch 06: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 46.62it/s]


Epoch 06 | Train NLL: -26.2282 | Val NLL: -26.0370


[Train] Epoch 07: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:14<00:00, 44.00it/s]


Epoch 07 | Train NLL: -26.3773 | Val NLL: -26.1618


[Train] Epoch 08: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 46.40it/s]


Epoch 08 | Train NLL: -26.5240 | Val NLL: -26.4759


[Train] Epoch 09: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:15<00:00, 41.64it/s]


Epoch 09 | Train NLL: -26.5951 | Val NLL: -26.7175


[Train] Epoch 10: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 45.83it/s]


Epoch 10 | Train NLL: -26.7317 | Val NLL: -26.9275


[Train] Epoch 11: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:14<00:00, 44.45it/s]


Epoch 11 | Train NLL: -26.7188 | Val NLL: -26.6317


[Train] Epoch 12: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 44.72it/s]


Epoch 12 | Train NLL: -26.7328 | Val NLL: -26.7116


[Train] Epoch 13: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 44.84it/s]


Epoch 13 | Train NLL: -26.8576 | Val NLL: -26.8212


[Train] Epoch 14: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:14<00:00, 44.63it/s]


Epoch 14 | Train NLL: -26.9053 | Val NLL: -26.9680


[Train] Epoch 15: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 44.89it/s]


Epoch 15 | Train NLL: -26.9073 | Val NLL: -26.9744


[Train] Epoch 16: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:14<00:00, 43.98it/s]


Epoch 16 | Train NLL: -26.9820 | Val NLL: -26.7987


[Train] Epoch 17: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 45.77it/s]


Epoch 17 | Train NLL: -26.9889 | Val NLL: -26.7509


[Train] Epoch 18: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 47.22it/s]


Epoch 18 | Train NLL: -27.0472 | Val NLL: -26.7878


[Train] Epoch 19: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 47.71it/s]


Epoch 19 | Train NLL: -27.1057 | Val NLL: -27.0166


[Train] Epoch 20: 100%|████████████████████████████████████████████████████████████████████████████| 625/625 [00:13<00:00, 47.51it/s]


Epoch 20 | Train NLL: -27.0738 | Val NLL: -26.9592


In [10]:
from sklearn.metrics import roc_auc_score, accuracy_score

flow.eval()
nf_scores, y_test_true = [], []

with torch.no_grad():
    for xb, ctx, yb in test_loader:
        xb, ctx = xb.to(device), ctx.to(device)
        ll = flow.log_prob(inputs=xb, context=ctx)  # log-likelihood
        nf_scores.append(ll.cpu().numpy())
        y_test_true.append(yb.cpu().numpy())

# Concatenate results
nf_scores = np.concatenate(nf_scores)
y_test_true = np.concatenate(y_test_true)

# Invert scores for anomaly detection (background = anomaly = low LL)
anomaly_scores = -nf_scores

# Metrics
roc_auc = roc_auc_score(y_test_true, anomaly_scores)
acc = accuracy_score(y_test_true, anomaly_scores < np.median(anomaly_scores))  # crude threshold

print(f"[NF Evaluation] AUC: {roc_auc:.3f}, Accuracy (median cutoff): {acc:.3f}")

[NF Evaluation] AUC: nan, Accuracy (median cutoff): 0.500




In [None]:
print("Train signal label:", train_signal_label)
print("Test signal label counts:", np.unique(y_test_filtered, return_counts=True))
print("y_test_binary distribution:", np.unique(y_test_binary, return_counts=True))

In [None]:
model.eval()
bkg_probs = []

with torch.no_grad():
    for xb, _ in bkg_loader:
        xb = xb.to(device)
        probs = torch.sigmoid(model(xb))
        bkg_probs.append(probs.cpu().numpy())

bkg_probs = np.concatenate(bkg_probs)

plt.hist(bkg_probs, bins=50, color='orange', alpha=0.7)
plt.title("Classifier Scores on Background-Only Set")
plt.xlabel("Score")
plt.ylabel("Count")
plt.grid()
plt.show()

print(f"Background scores > 0.5: {(bkg_probs > 0.5).sum()} / {len(bkg_probs)}")

In [None]:
# Load trained NF
nf_model = torch.jit.load("/home/katya.govorkova/gwak2/gwak/output/ResNet_NF_from_file_conditioning_HL/model_JIT.pt").eval().to(device)

# Evaluate NF log-probabilities on test set
nf_scores = []

with torch.no_grad():
    for i in range(0, len(X_test), 512):
        xb = torch.tensor(X_test[i:i+512], dtype=torch.float32).to(device)           # standardized embeddings
        ctx = context_test_tensor[i:i+512].to(device)                                # corresponding context
        log_probs = nf_model(xb, context=ctx) * (-1)                               # assumes .log_prob(x, context=...)
        nf_scores.append(log_probs.cpu().numpy())

nf_scores = np.concatenate(nf_scores)

In [None]:
from sklearn.metrics import roc_auc_score, roc_curve

# Signal of interest
signal_label = 8
background_labels = [10, 11]

# Mask for selected signal + background
mask = np.isin(y_test_full, [signal_label] + background_labels)
y_bin_true = (y_test_full[mask] == signal_label).astype(int)

# Classifier-based scores
clf_scores = y_test_probs[mask]

# NF anomaly scores (lower = more anomalous, so invert)
nf_anomaly_scores = nf_scores[mask]

# Plot
plt.figure(figsize=(8, 6))

# Classifier ROC
fpr_clf, tpr_clf, _ = roc_curve(y_bin_true, clf_scores)
auc_clf = roc_auc_score(y_bin_true, clf_scores)
plt.plot(fpr_clf, tpr_clf, label=f"Classifier (AUC={auc_clf:.2f})")

# NF ROC
fpr_nf, tpr_nf, _ = roc_curve(y_bin_true, nf_anomaly_scores)
auc_nf = roc_auc_score(y_bin_true, nf_anomaly_scores)
plt.plot(fpr_nf, tpr_nf, label=f"NF (AUC={auc_nf:.2f})")

# Plot formatting
plt.plot([0, 1], [0, 1], '--', color='gray')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Signal 8 (WNB) vs Background (10,11)")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Log-scale histogram of predicted probabilities
plt.figure(figsize=(8, 5))
plt.hist(y_test_probs[y_test_true == 1], bins=100, alpha=0.6, label='Signal', density=True)
plt.hist(y_test_probs[y_test_true == 0], bins=100, alpha=0.6, label='Background', density=True)
plt.xscale('log')
plt.xlabel('Prediction Score (log scale)')
plt.ylabel('Density')
plt.title('Classifier Score Tail (log scale)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Create subplots
fig, axs = plt.subplots(1, 2, figsize=(14, 5), sharey=False)

# --- Left: Classifier ---
axs[0].hist(y_test_probs[y_test_true == 1], bins=100, alpha=0.6, label=f'Signal, {max(y_test_probs[y_test_true == 1]):.2f}', density=True)
axs[0].hist(y_test_probs[y_test_true == 0], bins=100, alpha=0.6, label=f'Background, {max(y_test_probs[y_test_true == 0]):.2f}', density=True)
axs[0].set_xlabel('Classifier Score (log scale)')
axs[0].set_ylabel('Density')
axs[0].set_title('Classifier Output')
axs[0].legend()
axs[0].grid(True)

# --- Right: NF (clipped) ---
axs[1].hist(nf_scores[y_test_true == 1], bins=100, alpha=0.6, label=f'Signal, {max(nf_scores[y_test_true == 1]):.2f}', density=True)
axs[1].hist(nf_scores[y_test_true == 0], bins=100, alpha=0.6, label=f'Background, {max(nf_scores[y_test_true == 0]):.2f}', density=True)
axs[1].set_xlabel('−Log-Likelihood (log scale)')
axs[1].set_title('Normalizing Flow Output')
axs[1].legend()
axs[1].grid(True)

plt.suptitle("Signal vs Background Score Distributions (log-scale)")
plt.tight_layout()
plt.show()

In [None]:
min(y_test_probs[y_test_true == 1]), max(y_test_probs[y_test_true == 1])

In [None]:
min(y_test_probs[y_test_true == 0]), max(y_test_probs[y_test_true == 0])

In [None]:
from gwak.train.plotting import make_corner
fig = make_corner(X_train, (y_train).astype(int), return_fig=True, label_names=['WNB', 'Background'])