# Imports

In [None]:
import multiprocessing
num_available_cpus = multiprocessing.cpu_count()

print("Number of available CPUs:", num_available_cpus)

import sys

import math
import time
import tqdm

import numpy as np
import scipy as sp
from scipy import stats

import itertools
import logging
import matplotlib.pyplot as plt

import pandas as pd
import h5py

from sklearn import metrics

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.distributions import MultivariateNormal
import torch.utils.data as utils

from argparse import ArgumentParser
import re

sys.path.append("../new_flows")
from flows import RealNVP, Planar, MAF
from models import NormalizingFlowModel

In [None]:
from nflows.flows.base import Flow
from nflows.flows.autoregressive import MaskedAutoregressiveFlow
from nflows.distributions.normal import StandardNormal
from nflows.transforms.base import CompositeTransform
from nflows.transforms.autoregressive import MaskedAffineAutoregressiveTransform, MaskedPiecewiseQuadraticAutoregressiveTransform, MaskedPiecewiseRationalQuadraticAutoregressiveTransform
from nflows.transforms.permutations import ReversePermutation

In [None]:
import multiprocessing
num_available_cpus = multiprocessing.cpu_count()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device =", device)
torch.set_default_tensor_type('torch.cuda.FloatTensor') if torch.cuda.is_available() else print ('cpu')

torch.set_num_threads(num_available_cpus)

print(torch.get_num_threads())
print(torch.get_num_interop_threads())

In [None]:
num_features = 12
NS_hidden_features = 48
flow_type = 'NSQUAD' #Options are 'MAF', 'Planar' (not recommended), 'NSQUAD', and 'NSRATQUAD'
eta_cut = None

In [None]:
zdim = [4]
nflow = [10]
lrs = [1e-3]
betas = [0.0,0.01,0.05,0.1,1.0]
#betas = [0.1,0.5,1.0,2.0,10.0]
#betas = [20.0,25.0,50.0,75.0,100.0]

In [None]:
class model_result(object):
    
    def __init__(self, prefix, aetype):
        self.sigloss = np.load(prefix+'_'+sigloss+'.npy')
        self.bkgloss = np.load(prefix+'_'+bkgloss+'.npy')
        self.aetype = aetype

        
    def get_tpr_fpr(self):
        bins = np.linspace(0,10000,100001)
        tpr = []
        fpr = []
        for cut in bins:
            if self.aetype == 'sig':
                tpr.append(np.where(self.sigloss<cut)[0].shape[0]/len(self.sigloss))
                fpr.append(np.where(self.bkgloss<cut)[0].shape[0]/len(self.bkgloss))
            if self.aetype == 'bkg':
                tpr.append(np.where(self.sigloss>cut)[0].shape[0]/len(self.sigloss))
                fpr.append(np.where(self.bkgloss>cut)[0].shape[0]/len(self.bkgloss))
        

        return tpr,fpr
    
    def get_precision_recall(self):
        bins = np.linspace(0,1000,10001)
        tpr = []
        fpr = []
        precision = []
        for cut in bins:
            if self.aetype == 'sig':
                tpr.append(np.where(self.sigloss<cut)[0].shape[0]/len(self.sigloss))
                precision.append((np.where(self.sigloss<cut)[0].shape[0])/(np.where(self.bkgloss<cut)[0].shape[0]+np.where(self.sigloss<cut)[0].shape[0]))
            
            if self.aetype == 'bkg':
                tpr.append(np.where(self.sigloss>cut)[0].shape[0]/len(self.sigloss))
                precision.append((np.where(self.sigloss>cut)[0].shape[0])/(np.where(self.bkgloss>cut)[0].shape[0]+np.where(self.sigloss>cut)[0].shape[0]))
        

        return precision,tpr  

    def FPRat95TPR(self):
        tprs, fprs = get_tpr_fpr(self)
        for i in range(len(tprs)-1):
            if (tprs[i] < 0.95) and (tprs[i+1] >= 0.95):
                return fprs[i+1]

    def FPRat99TPR(self):
        tprs, fprs = get_tpr_fpr(self)
        for i in range(len(tprs) - 1):
            if (tprs[i] < 0.99) and (tprs[i + 1] >= 0.99):
                return fprs[i+1]    

In [None]:
#### MAF / Planar / NSQUAD / NSRATQUAD
class VAE_NF(nn.Module):
    def __init__(self, K, D):
        super().__init__()
        self.dim = D
        self.K = K
        
        '''
        self.encoder = nn.Sequential(
            nn.Linear(num_features, 50),
            nn.LeakyReLU(True),
            nn.Linear(50, 30),
            nn.LeakyReLU(True),
            nn.Linear(30, 20),
            nn.LeakyReLU(True),
            nn.Linear(20, D * 2)
        )

        self.decoder = nn.Sequential(
            nn.Linear(D, 20),
            nn.LeakyReLU(True),
            nn.Linear(20, 30),
            nn.LeakyReLU(True),
            nn.Linear(30, 50),
            nn.LeakyReLU(True),
            nn.Linear(50, num_features)
        )
        '''
        
        self.encoder = nn.Sequential(
            nn.Linear(num_features, 30),
            nn.LeakyReLU(True),
            nn.Linear(30, 20),
            nn.LeakyReLU(True),
            nn.Linear(20, D * 2)
        )

        self.decoder = nn.Sequential(
            nn.Linear(D, 20),
            nn.LeakyReLU(True),
            nn.Linear(20, 30),
            nn.LeakyReLU(True),
            nn.Linear(30, num_features)
        )
        
        if flow_type == 'NSQUAD' or flow_type == 'NSRATQUAD': 
            #----- BEGIN NEW NEURAL SPLINE CODE
            
            bkg_transforms = []
            for _ in range(K):
                bkg_transforms.append(ReversePermutation(features=D))
                if flow_type == 'NSQUAD': 
                    bkg_transforms.append(MaskedPiecewiseQuadraticAutoregressiveTransform(features=D, 
                                                                      hidden_features=NS_hidden_features, tail_bound = 3.0, tails='linear'))
                elif flow_type == 'NSRATQUAD': 
                    bkg_transforms.append(MaskedPiecewiseRationalQuadraticAutoregressiveTransform(features=D, 
                                                                      hidden_features=NS_hidden_features, tail_bound = 3.0, tails='linear'))

            #bkg_transform = CompositeTransform(bkg_transforms)
            bkg_base_dist = MultivariateNormal(torch.zeros(D).cuda(), torch.eye(D).cuda())
            self.flows = NormalizingFlowModel(bkg_base_dist, bkg_transforms)
            print(self.flows)
            
            #----- END NEW NEURAL SPLINE CODE
        
        elif flow_type == 'MAF' or flow_type == 'Planar': 
            if flow_type == 'MAF': 
                flow_init = MAF(dim=D)
            elif flow_type == 'Planar': 
                flow_init = Planar(dim=D)
            flows_init = [flow_init for _ in range(K)]
            prior = MultivariateNormal(torch.zeros(D).cuda(), torch.eye(D).cuda())
            self.flows = NormalizingFlowModel(prior, flows_init)
            print(self.flows)
        
        else: 
            print('ERROR: Flow Type not properly specified.')

    def forward(self, x):
        # Run Encoder and get NF params
        enc = self.encoder(x)
        mu = enc[:, :self.dim]
        log_var = enc[:, self.dim: self.dim * 2]

        # Re-parametrize
        sigma = (log_var * .5).exp()
        z = mu + sigma * torch.randn_like(sigma)
        kl_div = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
        # Construct more expressive posterior with NF
        
        z_k, _, sum_ladj = self.flows(z)
        
        kl_div = kl_div / x.size(0) - sum_ladj.mean()  # mean over batch

        # Run Decoder
        x_prime = self.decoder(z_k)
        return x_prime, kl_div

# Prepping test dataset

In [None]:
plt.rcParams["figure.figsize"] = (5,5)

In [None]:
num_bkg_batches = 1
num_sig_batches = 35

Mjj_cut = 1200
pt_cut = 550
eta_cut = None
box_cox = False

In [None]:
bkg_data = np.array([])

for batch_number in range(num_bkg_batches): 
    train_batch = "/nobackup/users/myunus/CASE_samples/BB_batch%s.h5" % (batch_number)
    f = h5py.File(train_batch, "r")
    
    if batch_number == 0: 
        print("Keys: %s" % f.keys())
    
    jet_kinematics = f['jet_kinematics']
    jet1_extraInfo = f['jet1_extraInfo']
    jet2_extraInfo = f['jet2_extraInfo']
    truth_label = f['truth_label']

    np.seterr(invalid = 'ignore')

    delta_eta = jet_kinematics[:,1]

    Mjj = np.reshape(jet_kinematics[:,0], (-1,1))
    Mj1 = np.reshape(jet_kinematics[:,5], (-1,1))
    Mj2 = np.reshape(jet_kinematics[:,9], (-1,1))

    jet1_pt = np.reshape(jet_kinematics[:,2], (-1,1))
    jet2_pt = np.reshape(jet_kinematics[:,6], (-1,1))

    jet1_tau1 = np.reshape(jet1_extraInfo[:,0], (-1,1))
    jet1_tau2 = np.reshape(jet1_extraInfo[:,1], (-1,1))
    jet1_tau3 = np.reshape(jet1_extraInfo[:,2], (-1,1))
    jet1_tau4 = np.reshape(jet1_extraInfo[:,3], (-1,1))
    #jet1_btagscore = np.reshape(jet1_extraInfo[:,5],(-1,1))
    jet1_numpfconst = np.reshape(jet1_extraInfo[:,6],(-1,1))

    jet1_tau21 = jet1_tau2 / jet1_tau1
    jet1_tau32 = jet1_tau3 / jet1_tau2
    jet1_tau43 = jet1_tau4 / jet1_tau3
    jet1_sqrt_tau21 = np.sqrt(jet1_tau21) / jet1_tau1

    jet2_tau1 = np.reshape(jet2_extraInfo[:,0], (-1,1))
    jet2_tau2 = np.reshape(jet2_extraInfo[:,1], (-1,1))
    jet2_tau3 = np.reshape(jet2_extraInfo[:,2], (-1,1))
    jet2_tau4 = np.reshape(jet2_extraInfo[:,3], (-1,1))
    #jet2_btagscore = np.reshape(jet2_extraInfo[:,5],(-1,1))
    jet2_numpfconst = np.reshape(jet2_extraInfo[:,6],(-1,1))

    jet2_tau21 = jet2_tau2 / jet2_tau1
    jet2_tau32 = jet2_tau3 / jet2_tau2
    jet2_tau43 = jet2_tau4 / jet2_tau3
    jet2_sqrt_tau21 = np.sqrt(jet2_tau21) / jet2_tau1

    truth_label = truth_label[:]
    
    data = np.concatenate((Mj1, jet1_tau21, jet1_tau32, jet1_tau43, jet1_sqrt_tau21, jet1_numpfconst, 
                       Mj2, jet2_tau21, jet2_tau32, jet2_tau43, jet2_sqrt_tau21, jet2_numpfconst), axis=1)

    bkg_indices = np.where((truth_label == 0) 
                              & (Mjj > Mjj_cut) 
                              & (jet1_pt > pt_cut) 
                              & (jet2_pt > pt_cut)
                              & (np.isfinite(jet1_tau21))
                              & (np.isfinite(jet1_tau32))
                              & (np.isfinite(jet1_tau43))
                              & (np.isfinite(jet1_sqrt_tau21))
                              & (np.isfinite(jet2_tau21))
                              & (np.isfinite(jet2_tau32))
                              & (np.isfinite(jet2_tau43))
                              & (np.isfinite(jet2_sqrt_tau21)))[0]

    if eta_cut is not None:    
        bkg_eta_indices = np.where((np.abs(delta_eta) < eta_cut))[0]     
        bkg_indices = np.intersect1d(bkg_indices, bkg_eta_indices)

    if batch_number == 0: 
        bkg_data = data[bkg_indices]

    else: 
        bkg_data = np.concatenate((bkg_data, data[bkg_indices]), axis=0)
        
    if box_cox: 
        
        transformed_data = np.zeros(bkg_data.shape)
        best_lambdas = []
        for col in range(num_features): 
            boxcox_col, best_lambda = stats.boxcox(bkg_data[:,col] + np.abs(np.min(bkg_data[:,col])) + 1)
            transformed_data[:,col] = boxcox_col
            best_lambdas.append(best_lambda)

        print(best_lambdas)

        bkg_data = transformed_data
    
print(bkg_data.shape)

In [None]:
sig_data = np.array([])

for batch_number in range(num_sig_batches): 
    train_batch = "/nobackup/users/myunus/CASE_samples/BB_batch%s.h5" % (batch_number)
    f = h5py.File(train_batch, "r")
    
    if batch_number == 0: 
        print("Keys: %s" % f.keys())
    
    jet_kinematics = f['jet_kinematics']
    jet1_extraInfo = f['jet1_extraInfo']
    jet2_extraInfo = f['jet2_extraInfo']
    truth_label = f['truth_label']

    np.seterr(invalid = 'ignore')

    delta_eta = jet_kinematics[:,1]

    Mjj = np.reshape(jet_kinematics[:,0], (-1,1))
    Mj1 = np.reshape(jet_kinematics[:,5], (-1,1))
    Mj2 = np.reshape(jet_kinematics[:,9], (-1,1))

    jet1_pt = np.reshape(jet_kinematics[:,2], (-1,1))
    jet2_pt = np.reshape(jet_kinematics[:,6], (-1,1))

    jet1_tau1 = np.reshape(jet1_extraInfo[:,0], (-1,1))
    jet1_tau2 = np.reshape(jet1_extraInfo[:,1], (-1,1))
    jet1_tau3 = np.reshape(jet1_extraInfo[:,2], (-1,1))
    jet1_tau4 = np.reshape(jet1_extraInfo[:,3], (-1,1))
    #jet1_btagscore = np.reshape(jet1_extraInfo[:,5],(-1,1))
    jet1_numpfconst = np.reshape(jet1_extraInfo[:,6],(-1,1))

    jet1_tau21 = jet1_tau2 / jet1_tau1
    jet1_tau32 = jet1_tau3 / jet1_tau2
    jet1_tau43 = jet1_tau4 / jet1_tau3
    jet1_sqrt_tau21 = np.sqrt(jet1_tau21) / jet1_tau1

    jet2_tau1 = np.reshape(jet2_extraInfo[:,0], (-1,1))
    jet2_tau2 = np.reshape(jet2_extraInfo[:,1], (-1,1))
    jet2_tau3 = np.reshape(jet2_extraInfo[:,2], (-1,1))
    jet2_tau4 = np.reshape(jet2_extraInfo[:,3], (-1,1))
    #jet2_btagscore = np.reshape(jet2_extraInfo[:,5],(-1,1))
    jet2_numpfconst = np.reshape(jet2_extraInfo[:,6],(-1,1))

    jet2_tau21 = jet2_tau2 / jet2_tau1
    jet2_tau32 = jet2_tau3 / jet2_tau2
    jet2_tau43 = jet2_tau4 / jet2_tau3
    jet2_sqrt_tau21 = np.sqrt(jet2_tau21) / jet2_tau1

    truth_label = truth_label[:]
    
    data = np.concatenate((Mj1, jet1_tau21, jet1_tau32, jet1_tau43, jet1_sqrt_tau21, jet1_numpfconst, 
                       Mj2, jet2_tau21, jet2_tau32, jet2_tau43, jet2_sqrt_tau21, jet2_numpfconst), axis=1)
    
    sig_indices = np.where((truth_label == 2) 
                              & (Mjj > Mjj_cut) 
                              & (jet1_pt > pt_cut) 
                              & (jet2_pt > pt_cut)
                              & (np.isfinite(jet1_tau21))
                              & (np.isfinite(jet1_tau32))
                              & (np.isfinite(jet1_tau43))
                              & (np.isfinite(jet1_sqrt_tau21))
                              & (np.isfinite(jet2_tau21))
                              & (np.isfinite(jet2_tau32))
                              & (np.isfinite(jet2_tau43))
                              & (np.isfinite(jet2_sqrt_tau21)))[0]

    if eta_cut is not None: 
        sig_eta_indices = np.copy(bkg_eta_indices)
        sig_indices = np.intersect1d(sig_indices, sig_eta_indices)

    if batch_number == 0: 
        sig_data = data[sig_indices]
        
    else: 
        sig_data = np.concatenate((sig_data, data[sig_indices]), axis=0)
        
    if box_cox: 

        transformed_data = np.zeros(sig_data.shape)
        best_lambdas = []
        for col in range(num_features): 
            boxcox_col, best_lambda = stats.boxcox(sig_data[:,col] + np.abs(np.min(sig_data[:,col])) + 1)
            transformed_data[:,col] = boxcox_col
            best_lambdas.append(best_lambda)

        print(best_lambdas)
    
        sig_data = transformed_data
    
print(sig_data.shape)

In [None]:
unnorm_bkg_data = np.copy(bkg_data)

bkg_mean = []
bkg_std = []

for index in range(bkg_data.shape[1]):
    mean = np.mean(bkg_data[:,index])
    std = np.std(bkg_data[:,index])
    bkg_mean.append(mean)
    bkg_std.append(std)
    bkg_data[:,index] = (bkg_data[:,index]-mean)/std

In [None]:
unnorm_sig_data = np.copy(sig_data)

sig_mean = []
sig_std = []

for index in range(sig_data.shape[1]):
    mean = np.mean(sig_data[:,index])
    std = np.std(sig_data[:,index])
    sig_mean.append(mean)
    sig_std.append(std)
    sig_data[:,index] = (sig_data[:,index]-mean)/std

In [None]:
bkg_mean

In [None]:
bkg_std

In [None]:
sig_mean

In [None]:
sig_std

In [None]:
bkg_test = torch.tensor(bkg_data)
sig_test = torch.tensor(sig_data)

In [None]:
def get_tpr_fpr_bkgtr(sigloss,bkgloss):
    bins = np.linspace(0,100,10001)
    tpr = []
    fpr = []
    for cut in bins:
        tpr.append(np.where(sigloss>cut)[0].shape[0]/len(sigloss))
        fpr.append(np.where(bkgloss>cut)[0].shape[0]/len(bkgloss))

    return tpr,fpr

In [None]:
tprlist = []
fprlist = []
namelist = []
bkgtr_siglosslist = []
bkgtr_bkglosslist = []
tprlist_forinverse = []
fprinverselist = []

for Z_DIM in zdim:
    for N_FLOWS in nflow:
        for beta in betas:
            model = VAE_NF(N_FLOWS, Z_DIM).cuda()
            #NOTE: The "architecture" key below can be set to "MAF", "Planar" (not recommended), "NSQUAD", or "NSRATQUAD". 
            ae_def = {
                        "type":"qcdbkg",
                        "trainon":f"etacut{re.sub('[.,]', 'p', str(eta_cut))}",
                        "features":"12features",
                        "architecture":"%s" % (flow_type),
                        "selection":"mjjcut",
                        "trainloss":"MSELoss",
                        "beta":f"beta{re.sub('[.,]', 'p', str(beta))}",
                        "zdimnflow":f"z{Z_DIM}f{N_FLOWS}"
                     }
            model.load_state_dict(torch.load(f"/home/myunus/CASE/weights/{ae_def['type']}_{ae_def['trainon']}_{ae_def['features']}_{ae_def['architecture']}_{ae_def['selection']}_{ae_def['trainloss']}_{ae_def['beta']}_{ae_def['zdimnflow']}.h5"), strict=False)
            #NOTE: Replace the /home/myunus/ above with the directory in which QUASAR resides.
            model.eval()
            with torch.no_grad():
                sig_loss = torch.mean((model(sig_test.float().cuda())[0]- sig_test.float().cuda())**2,dim=1).data.cpu().numpy()
                bkg_loss = torch.mean((model(bkg_test.float().cuda())[0]- bkg_test.float().cuda())**2,dim=1).data.cpu().numpy()
                #sig_loss = torch.mean((model(sig_test.float().cuda())[0]- sig_test.float().cuda())**2,dim=1).data.cpu().numpy() + beta * F.kl_div(model(sig_test.float().cuda())[0], sig_test.float().cuda()).cpu().numpy()
                #bkg_loss = torch.mean((model(bkg_test.float().cuda())[0]- bkg_test.float().cuda())**2,dim=1).data.cpu().numpy() + beta * F.kl_div(model(bkg_test.float().cuda())[0], bkg_test.float().cuda()).cpu().numpy()
                    
            if beta == 1.0: 
                hi_loss_sig_indices = np.argwhere(sig_loss > 1).flatten()
                lo_loss_sig_indices = np.argwhere(sig_loss < 1).flatten()
                
                hi_loss_sig_mj1 = unnorm_sig_data[hi_loss_sig_indices][:,0]
                lo_loss_sig_mj1 = unnorm_sig_data[lo_loss_sig_indices][:,0]
                
                hi_loss_sig_mj2 = unnorm_sig_data[hi_loss_sig_indices][:,7]
                lo_loss_sig_mj2 = unnorm_sig_data[lo_loss_sig_indices][:,7]
                
                plt.hist(sig_loss, bins=50)
                plt.xlabel('sig loss (beta = 1.0)')
                plt.show()

                plt.hist(bkg_loss, bins=50)
                plt.xlabel('bkg loss (beta = 1.0)')
                plt.show()
                
                plt.hist(hi_loss_sig_mj1, bins=50)
                plt.xlabel('sig mj1 (loss > 1)')
                plt.show()
                
                plt.hist(lo_loss_sig_mj1, bins=50)
                plt.xlabel('sig mj1 (loss < 1)')
                plt.show()
                
                plt.hist(hi_loss_sig_mj2, bins=50)
                plt.xlabel('sig mj2 (loss > 1)')
                plt.show()
                
                plt.hist(lo_loss_sig_mj2, bins=50)
                plt.xlabel('sig mj2 (loss < 1)')
                plt.show()
            
            np.save(f"/home/myunus/CASE/data_strings/{ae_def['type']}_{ae_def['trainon']}_{ae_def['features']}_{ae_def['architecture']}_{ae_def['selection']}_{ae_def['trainloss']}_{ae_def['beta']}_{ae_def['zdimnflow']}_sigloss.npy", sig_loss)
            #NOTE: Replace the /home/myunus/ above with the directory in which QUASAR resides.
            np.save(f"/home/myunus/CASE/data_strings/{ae_def['type']}_{ae_def['trainon']}_{ae_def['features']}_{ae_def['architecture']}_{ae_def['selection']}_{ae_def['trainloss']}_{ae_def['beta']}_{ae_def['zdimnflow']}_bkgloss.npy", bkg_loss)
            #NOTE: Replace the /home/myunus/ above with the directory in which QUASAR resides.
            
            namelist.append(ae_def)
            tpr, fpr = get_tpr_fpr_bkgtr(sig_loss,bkg_loss)
            tprlist.append(tpr)
            fprlist.append(fpr)
            tpr_np, fpr_np = np.array(tpr), np.array(fpr)
            
            nonzero_idx = np.nonzero(fpr_np)
            
            bkgtr_siglosslist.append(sig_loss)
            bkgtr_bkglosslist.append(bkg_loss)
            
            tprlist_forinverse.append(tpr_np[nonzero_idx])
            fprinverselist.append(1/fpr_np[nonzero_idx]) 
            
            if beta == 0.01: 
                for plot_var in range(num_features):
                    print(bkg_test.cpu().numpy().shape)
                    print(model(bkg_test.float().cuda())[0].data.cpu().numpy().shape)
                    n, bins, patches = plt.hist((bkg_test.cpu().numpy() * bkg_std + bkg_mean)[:,plot_var], bins = 50, density = True, alpha = 0.75)
                    plt.hist((model(bkg_test.float().cuda())[0].data.cpu().numpy() * bkg_std + bkg_mean)[:,plot_var], bins = bins, density = True, alpha = 0.75)
                    plt.show()

In [None]:
for index in range(len(betas)): 
        
    bkgtr_bkgloss = bkgtr_bkglosslist[index]
    bkgtr_sigloss = bkgtr_siglosslist[index]
    
    df_bkgloss = pd.DataFrame(bkgtr_bkgloss)
    df_sigloss = pd.DataFrame(bkgtr_sigloss)
    df_bkgloss.to_csv('csv_files/bkgtr_bkgloss_%s_%s_%s.csv' % (namelist[index]['trainon'], namelist[index]['architecture'], namelist[index]['beta'])) 
    df_sigloss.to_csv('csv_files/bkgtr_sigloss_%s_%s_%s.csv' % (namelist[index]['trainon'], namelist[index]['architecture'], namelist[index]['beta']))  

In [None]:
for tpr, fpr, name,sigloss,bkgloss in zip(tprlist_forinverse,fprinverselist, namelist,bkgtr_siglosslist,bkgtr_bkglosslist):
    #if name['beta'] == 'beta10p0' or name['beta'] == 'beta2p0':
    if name['zdimnflow'] == 'z4f10':
        #print(tpr, fpr)
        plt.plot(tpr,fpr, label=f"{name['beta']}_{name['zdimnflow']}")
        print(f"{name['beta']}_{name['zdimnflow']}",metrics.auc(fpr,tpr))
        #plt.hist(sigloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
        #plt.hist(bkgloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
plt.xlabel(r'$\epsilon_{sig}$',fontsize=15)
plt.ylabel(r'$1/\epsilon_{bkg}$',fontsize=15)
#plt.semilogy()
plt.yscale('log')

plt.title('Background Prior')
#plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.legend(loc='upper right')
plt.xlim([0.0,1.0])
#plt.ylim([0.0,1.0])
#plt.savefig('ROC_effectiveness_of_quak.png')

In [None]:
for tpr, fpr, name,sigloss,bkgloss in zip(tprlist,fprlist, namelist,bkgtr_siglosslist,bkgtr_bkglosslist):
    #if name['beta'] == 'beta10p0' or name['beta'] == 'beta2p0':
    if name['zdimnflow'] == 'z4f10':
        plt.plot(fpr,tpr, label=f"{name['beta']}_{name['zdimnflow']}")
        print(f"{name['beta']}_{name['zdimnflow']}",metrics.auc(fpr,tpr))
        #plt.hist(sigloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
        #plt.hist(bkgloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
plt.xlabel(r'$\epsilon_{bkg}$',fontsize=15)
plt.ylabel(r'$\epsilon_{sig}$',fontsize=15)
#plt.semilogy()
#plt.yscale('log')

plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
#plt.legend(loc='lower right')
#plt.xlim([0.05,1.0])
#plt.ylim([0.0,1.0])
#plt.savefig('ROC_effectiveness_of_quak.png')

# Signal Autoencoder

In [None]:
def get_tpr_fpr_sigtr(sigloss,bkgloss):
    bins = np.linspace(0,100,10001)
    tpr = []
    fpr = []
    for cut in bins:
        tpr.append(np.where(sigloss<cut)[0].shape[0]/len(sigloss))
        fpr.append(np.where(bkgloss<cut)[0].shape[0]/len(bkgloss))

    return tpr,fpr

In [None]:
tprlist = []
fprlist = []
namelist = []
sigtr_siglosslist = []
sigtr_bkglosslist = []
tprlist_forinverse = []
fprinverselist = []

for Z_DIM in zdim:
    for N_FLOWS in nflow:
        for beta in betas:
            model = VAE_NF(N_FLOWS, Z_DIM).cuda()
            #NOTE: The "architecture" key below can be set to "MAF", "Planar" (not recommended), "NSQUAD", or "NSRATQUAD". 
            ae_def = {
                        "type":"wprimesig",
                        "trainon":f"etacut{re.sub('[.,]', 'p', str(eta_cut))}",
                        "features":"12features",
                        "architecture":"%s" % (flow_type),
                        "selection":"mjjcut",
                        "trainloss":"MSELoss",
                        "beta":f"beta{re.sub('[.,]', 'p', str(beta))}",
                        "zdimnflow":f"z{Z_DIM}f{N_FLOWS}"
                     }
            model.load_state_dict(torch.load(f"/home/myunus/CASE/weights/{ae_def['type']}_{ae_def['trainon']}_{ae_def['features']}_{ae_def['architecture']}_{ae_def['selection']}_{ae_def['trainloss']}_{ae_def['beta']}_{ae_def['zdimnflow']}.h5"), strict=False)
            #NOTE: Replace the /home/myunus/ above with the directory in which QUASAR resides.
            model.eval()
            with torch.no_grad():
                sig_loss = torch.mean((model(sig_test.float().cuda())[0]- sig_test.float().cuda())**2,dim=1).data.cpu().numpy()
                bkg_loss = torch.mean((model(bkg_test.float().cuda())[0]- bkg_test.float().cuda())**2,dim=1).data.cpu().numpy()
                #sig_loss = torch.mean((model(sig_test.float().cuda())[0]- sig_test.float().cuda())**2,dim=1).data.cpu().numpy() + beta * F.kl_div(model(sig_test.float().cuda())[0], sig_test.float().cuda()).cpu().numpy()
                #bkg_loss = torch.mean((model(bkg_test.float().cuda())[0]- bkg_test.float().cuda())**2,dim=1).data.cpu().numpy() + beta * F.kl_div(model(bkg_test.float().cuda())[0], bkg_test.float().cuda()).cpu().numpy()
              
            if beta == 1.0: 
                hi_loss_sig_indices = np.argwhere(sig_loss > 1).flatten()
                lo_loss_sig_indices = np.argwhere(sig_loss < 1).flatten()
                
                hi_loss_sig_mj1 = unnorm_sig_data[hi_loss_sig_indices][:,0]
                lo_loss_sig_mj1 = unnorm_sig_data[lo_loss_sig_indices][:,0]
                
                hi_loss_sig_mj2 = unnorm_sig_data[hi_loss_sig_indices][:,7]
                lo_loss_sig_mj2 = unnorm_sig_data[lo_loss_sig_indices][:,7]
                
                plt.hist(sig_loss, bins=50, range=(0,10))
                plt.xlabel('sig loss (beta = 1.0)')
                plt.show()

                plt.hist(bkg_loss, bins=50, range=(0,10))
                plt.xlabel('bkg loss (beta = 1.0)')
                plt.show()
                
                plt.hist(hi_loss_sig_mj1, bins=50)
                plt.xlabel('sig mj1 (loss > 1)')
                plt.show()
                
                plt.hist(lo_loss_sig_mj1, bins=50)
                plt.xlabel('sig mj1 (loss < 1)')
                plt.show()
                
                plt.hist(hi_loss_sig_mj2, bins=50)
                plt.xlabel('sig mj2 (loss > 1)')
                plt.show()
                
                plt.hist(lo_loss_sig_mj2, bins=50)
                plt.xlabel('sig mj2 (loss < 1)')
                plt.show()
                
            np.save(f"/home/myunus/CASE/data_strings/{ae_def['type']}_{ae_def['trainon']}_{ae_def['features']}_{ae_def['architecture']}_{ae_def['selection']}_{ae_def['trainloss']}_{ae_def['beta']}_{ae_def['zdimnflow']}_sigloss.npy", sig_loss)
            #NOTE: Replace the /home/myunus/ above with the directory in which QUASAR resides.
            np.save(f"/home/myunus/CASE/data_strings/{ae_def['type']}_{ae_def['trainon']}_{ae_def['features']}_{ae_def['architecture']}_{ae_def['selection']}_{ae_def['trainloss']}_{ae_def['beta']}_{ae_def['zdimnflow']}_bkgloss.npy", bkg_loss)
            #NOTE: Replace the /home/myunus/ above with the directory in which QUASAR resides.
            
            namelist.append(ae_def)
            tpr, fpr = get_tpr_fpr_sigtr(sig_loss,bkg_loss)
            tprlist.append(tpr)
            fprlist.append(fpr)
            tpr_np, fpr_np = np.array(tpr), np.array(fpr)
            
            nonzero_idx = np.nonzero(fpr_np)
            
            sigtr_siglosslist.append(sig_loss)
            sigtr_bkglosslist.append(bkg_loss)
            
            tprlist_forinverse.append(tpr_np[nonzero_idx])
            fprinverselist.append(1/fpr_np[nonzero_idx])        
            
            if beta == 0.01: 
                for plot_var in range(num_features):
                    print(sig_test.cpu().numpy().shape)
                    print(model(sig_test.float().cuda())[0].data.cpu().numpy().shape)
                    n, bins, patches = plt.hist((sig_test.cpu().numpy() * sig_std + sig_mean)[:,plot_var], bins = 50, density = True, alpha = 0.75)
                    plt.hist((model(sig_test.float().cuda())[0].data.cpu().numpy() * sig_std + sig_mean)[:,plot_var], bins = bins, density = True, alpha = 0.75)
                    plt.show()

In [None]:
for tpr, fpr, name,sigloss,bkgloss in zip(tprlist_forinverse,fprinverselist, namelist,sigtr_siglosslist,sigtr_bkglosslist):
    #if name['beta'] == 'beta10p0' or name['beta'] == 'beta2p0':
    if name['zdimnflow'] == 'z4f10':
        #print(tpr, fpr)
        plt.plot(tpr,fpr, label=f"{name['beta']}_{name['zdimnflow']}")
        print(f"{name['beta']}_{name['zdimnflow']}",metrics.auc(fpr,tpr))
        #plt.hist(sigloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
        #plt.hist(bkgloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
plt.xlabel(r'$\epsilon_{sig}$',fontsize=15)
plt.ylabel(r'$1/\epsilon_{bkg}$',fontsize=15)
#plt.semilogy()
plt.yscale('log')

plt.title('Background Prior')
#plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.legend(loc='upper right')
plt.xlim([0.0,1.0])
#plt.ylim([0.0,1.0])
#plt.savefig('ROC_effectiveness_of_quak.png')

In [None]:
for tpr, fpr, name,sigloss,bkgloss in zip(tprlist,fprlist, namelist,sigtr_siglosslist,sigtr_bkglosslist):
    #if name['beta'] == 'beta10p0' or name['beta'] == 'beta2p0':
    if name['zdimnflow'] == 'z4f10':
        plt.plot(fpr,tpr, label=f"{name['beta']}_{name['zdimnflow']}")
        print(f"{name['beta']}_{name['zdimnflow']}",metrics.auc(fpr,tpr))
        #plt.hist(sigloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
        #plt.hist(bkgloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
plt.xlabel(r'$\epsilon_{bkg}$',fontsize=15)
plt.ylabel(r'$\epsilon_{sig}$',fontsize=15)
#plt.semilogy()
#plt.yscale('log')

plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
#plt.legend(loc='lower right')
#plt.xlim([0.05,1.0])
#plt.ylim([0.0,1.0])
#plt.savefig('ROC_effectiveness_of_quak.png')

# 2D ROC Curves (Still in Development)

In [None]:
from scipy.spatial import ConvexHull

def get_hull_coordinates(scan_tpr, scan_fpr):
    points = np.array([scan_fpr,scan_tpr])
    points = points.transpose()
    hull = ConvexHull(points)
    fpr = np.array(points[hull.vertices[:],0])
    tpr = np.array(points[hull.vertices[:],1])
    nonzero_idx = np.nonzero(fpr)
    fpr = fpr[nonzero_idx]
    tpr = tpr[nonzero_idx]
    return tpr, fpr 

def get_tpr_fpr_2d(sigae_sigloss,sigae_bkgloss,bkgae_sigloss,bkgae_bkgloss):
    bins_sigae = np.arange(0,30,0.1)
    bins_bkgae = np.arange(0,30,0.1)
    tpr = []
    fpr = []
    for sigcut in bins_sigae:
        for bkgcut in bins_bkgae:
            tpr_t = np.where((sigae_sigloss<sigcut)&(bkgae_sigloss>bkgcut))[0].shape[0]/len(sigae_sigloss)
            fpr_t = np.where((sigae_bkgloss<sigcut)&(bkgae_bkgloss>bkgcut))[0].shape[0]/len(sigae_bkgloss)
            tpr.append(tpr_t)
            fpr.append(fpr_t)
            
    tpr, fpr = get_hull_coordinates(tpr, fpr)
                

    return tpr,fpr

In [None]:
tpr2dlist = []
fpr2dlist = []
tpr2dlist_forinverse = []
fpr2dinverselist = []
for SSLs, SBLs, BSLs, BBLs in zip(sigtr_siglosslist, sigtr_bkglosslist, bkgtr_siglosslist, bkgtr_bkglosslist):
    print(SSLs[:20])
    print(SBLs[:20])
    print(BSLs[:20])
    print(BBLs[:20])
    tpr2d, fpr2d = get_tpr_fpr_2d(SSLs, SBLs, BSLs, BBLs)
    print(tpr2d.shape)
    print(fpr2d.shape)
    tpr2dlist.append(tpr2d)
    fpr2dlist.append(fpr2d)
    tpr2d_np, fpr2d_np = np.array(tpr2d), np.array(fpr2d)

    nonzero_idx = np.nonzero(fpr2d_np)

    tpr2dlist_forinverse.append(tpr2d_np[nonzero_idx])
    fpr2dinverselist.append(1/fpr2d_np[nonzero_idx])

In [None]:
for tpr2d, fpr2d, name in zip(tpr2dlist_forinverse,fpr2dinverselist, namelist):
    #if name['beta'] == 'beta10p0' or name['beta'] == 'beta2p0':
    if name['zdimnflow'] == 'z4f10':
        #print(tpr2d, fpr2d)
        print(tpr2d.shape, fpr2d.shape)
        plt.plot(tpr2d[1:],fpr2d[1:], label=f"{name['beta']}_{name['zdimnflow']}")
        #AUC Calculation below (buggy)
        #print(f"{name['beta']}_{name['zdimnflow']}",metrics.auc(fpr2d,tpr2d))
        #plt.hist(sigloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
        #plt.hist(bkgloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
plt.xlabel(r'$\epsilon_{sig}$',fontsize=15)
plt.ylabel(r'$1/\epsilon_{bkg}$',fontsize=15)
#plt.semilogy()
plt.yscale('log')

plt.title('Inverted 2D ROC')
#plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.legend(loc='upper right')
plt.xlim([0.0,1.0])
#plt.ylim([0.0,1.0])
#plt.savefig('ROC_effectiveness_of_quak.png')

In [None]:
for tpr2d, fpr2d, name in zip(tpr2dlist,fpr2dlist, namelist):
    #if name['beta'] == 'beta10p0' or name['beta'] == 'beta2p0':
    if name['zdimnflow'] == 'z4f10':
        plt.plot(fpr2d[1:],tpr2d[1:], label=f"{name['beta']}_{name['zdimnflow']}")
        #AUC calculation below (buggy)
        #print(f"{name['beta']}_{name['zdimnflow']}",metrics.auc(fpr2d,tpr2d))
        #plt.hist(sigloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
        #plt.hist(bkgloss,np.arange(0,10,0.1),alpha=0.2, density=True, label=f"{name['beta']}_{name['zdimnflow']}")
plt.xlabel(r'$\epsilon_{bkg}$',fontsize=15)
plt.ylabel(r'$\epsilon_{sig}$',fontsize=15)
#plt.semilogy()
#plt.yscale('log')

plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
#plt.legend(loc='lower right')
#plt.xlim([0.05,1.0])
#plt.ylim([0.0,1.0])
#plt.savefig('ROC_effectiveness_of_quak.png')