In [1]:
#imports
import uproot as ur
import awkward as ak
import numpy as np

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "2" #specify GPU
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

import tensorflow as tf

# energyflow imports
import energyflow as ef
from energyflow.archs import PFN
from energyflow.utils import data_split

from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt

import graph_util as gu
import plot_util as pu

In [2]:
data_path = '/fast_scratch/atlas_images/v01-45/' 

In [3]:
file = np.load('/fast_scratch/atlas_images/v01-45/nn_splitdata_kd.npz')

In [4]:
X_train = file['arr_0']
X_val = file['arr_1']
X_test = file['arr_2']
Y_train = file['arr_3']
Y_val = file['arr_4']
Y_test = file['arr_5']
eta_train = file['arr_9']
eta_val = file['arr_10']
eta_test = file['arr_11']

In [5]:
#this is a model WITHOUT the global features!! run this, then compare to the one WITH global features. See if including it helps!
def modelNmetrics_noglob(X_train, X_val, X_test, Y_train, Y_val, Y_test, eta_train, eta_val, eta_test, epochs, batch_size):
    #run the model & create the metrics
    
    Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)
   
    #make the model:
    pfn = PFN(input_dim=X_train.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)#, num_global_features =1)
    
    #try callbacks:
    callback = tf.keras.callbacks.ModelCheckpoint(data_path+'./w6_pfn_noglob.hdf5', save_best_only=True)
    
    # train model
    history = pfn.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, Y_val), verbose=1, callbacks=[callback])
    
    #define the different eta regions
    selec_l07 = abs(eta_test) < 0.7
    selec_b0714 = (abs(eta_test) >= .7) & (abs(eta_test) < 1.4)
    selec_b1425 = (abs(eta_test) >= 1.4) & (abs(eta_test) < 2.5)
    selec_b2531 = (abs(eta_test) >= 2.5) & (abs(eta_test) < 3.1) #max eta is 3 so!!
    selec_all = abs(eta_test) <= 3.1 
    
    selections = [selec_l07, selec_b0714, selec_b1425, selec_b2531, selec_all]
    
    #load up best model
    best = tf.keras.models.load_model(data_path+'./w6_pfn_noglob.hdf5')
    
    #make metrics
    fps = []
    tps = []
    threshss = []
    aucs = []
    for selection in selections:
        preds = best.predict(X_test[selection], batch_size=1000) 
        pfn_fp, pfn_tp, threshs = roc_curve(Y_test[selection][:,1], preds[:,1])
        
        fps.append(pfn_fp)
        tps.append(pfn_tp)
        threshss.append(threshs)
        
        # get area under the ROC curve
        auc = roc_auc_score(Y_test[selection][:,1], preds[:,1])
        aucs.append(auc)
        print('PFN AUC:', auc)
   
    return history, fps, tps, aucs, selections

In [6]:
history_nog, fp_nog, tp_nog, auc_nog, selecs_nog = modelNmetrics_noglob(X_train, X_val, X_test, Y_train, Y_val, Y_test, eta_train, eta_val, eta_test, 100, 1000)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, None, 4)]    0                                            
__________________________________________________________________________________________________
tdist_0 (TimeDistributed)       (None, None, 100)    500         input[0][0]                      
__________________________________________________________________________________________________
activation (Activation)         (None, None, 100)    0           tdist_0[0][0]                    
__________________________________________________________________________________________________
tdist_1 (TimeDistributed)       (None, None, 100)    10100       activation[0][0]                 
______________________________________________________________________________________________

In [7]:
X_train.shape

(380000, 2000, 4)