In [1]:
import uproot as ur
import awkward as ak
import numpy as np

data_path = '/fast_scratch/atlas_images/v01-45/'

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "2"
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

import tensorflow as tf

# energyflow imports
import energyflow as ef
from energyflow.archs import PFN
from energyflow.utils import data_split

from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt

In [2]:
import sys
sys.path.append('/Users/swiatlow/Code/ML4P/LCStudies')
sys.path.append('/home/mswiatlowski/start_tf/LCStudies')
import graph_util as gu
import plot_util as pu

path_pipm = data_path + 'pipm_medium.root'
path_pi0  = data_path + 'pi0_medium.root'


In [3]:
import convert_sets

Working on /fast_scratch/atlas_images/v01-45/pipm_medium.root
Loading data
Normalizing


  cell_e = np.nan_to_num(np.log(cell_e))


Writing out


  Y_target = np.log(clus_targetE[selection])
  Y_target = np.log(clus_targetE[selection])


Done! /fast_scratch/atlas_images/v01-45/pipm_medium.npz
Working on /fast_scratch/atlas_images/v01-45/pi0_medium.root
Loading data
Normalizing
Writing out
Done! /fast_scratch/atlas_images/v01-45/pi0_medium.npz


In [4]:
pipm_med = np.load('/fast_scratch/atlas_images/v01-45/pipm_medium.npz')
pi0_med = np.load('/fast_scratch/atlas_images/v01-45/pi0_medium.npz')

In [5]:
pipm_med.files

['X', 'Y_label', 'Y_target', 'clus_eta']

In [6]:
X_pipm = pipm_med['X']
X_pi0 = pi0_med['X']

Y_label_pipm = pipm_med['X']
Y_label_pi0 = pi0_med['X']

clus_eta_pipm = pipm_med['clus_eta']
clus_eta_pi0 = pi0_med['clus_eta']

In [8]:
selec_pipm = (abs(clus_eta_pipm) > 2.5) & (abs(clus_eta_pipm) < 3.1)
selec_pi0  = (abs(clus_eta_pi0) > 2.5) & (abs(clus_eta_pi0) < 3.1)


In [39]:
len(clus_eta_pipm), len(X_pipm)

(592857, 592857)

In [9]:
X_pipm_selec = X_pipm[selec_pipm]
X_pi0_selec = X_pi0[selec_pi0]

In [10]:
Y_label_pipm_selec = Y_label_pipm[selec_pipm]
Y_label_pi0_selec = Y_label_pi0[selec_pi0]

clus_eta_pipm_selec = clus_eta_pipm[selec_pipm]
clus_eta_pi0_selec = clus_eta_pi0[selec_pi0]

In [11]:
len(X_pipm_selec), len(X_pi0_selec)

(39324, 36328)

In [22]:
X = np.concatenate((X_pipm_selec[:36000], X_pi0_selec[:36000]))
Y = np.concatenate((Y_label_pipm_selec[:36000],Y_label_pi0_selec[:36000]))#
X.shape, Y.shape

((72000, 2000, 3), (72000, 2000, 3))

In [35]:
X = np.concatenate((X_pipm[:36000], X_pi0[:36000]))
Y = np.concatenate((Y_label_pipm[:36000],Y_label_pi0[:36000]))#try with no eta cuts
X.shape, Y.shape

((72000, 2000, 3), (72000, 2000, 3))

In [23]:
X.shape, Y.shape

((72000, 2000, 3), (72000, 2000, 3))

In [36]:
def modelNmetrics(X, Y, batch_size, test_size):
    Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)
    print('sizes: X ', len(X), 'Y: ', len(Y))
    # do train/val/test split 
    (X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X, Y, val=10000, test=test_size)
    print(X_train.shape, X_val.shape, X_test.shape, Y_train.shape, Y_val.shape, Y_test.shape)
    #make the model:
    pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)#, num_global_features =1)
    
    #try callbacks:
    callback = tf.keras.callbacks.ModelCheckpoint('./w6_pfn1', save_best_only=True)
    
    # train model
    history = pfn.fit(X_train, Y_train, epochs=200, batch_size=batch_size, validation_data=(X_val, Y_val), verbose=1)#, callbacks=callback)
    
    preds = pfn.predict(X_test, batch_size=1000)
    pfn_fp, pfn_tp, threshs = roc_curve(Y_test[:,1], preds[:,1])
    # get area under the ROC curve
    auc = roc_auc_score(Y_test[:,1], preds[:,1])
    print('PFN AUC:', auc)
   
    return history, pfn_fp, pfn_tp, auc

In [37]:
def plots(history, pfn_fp, pfn_tp, auc):
    
    fig, (ax1, ax2) = plt.subplots(1, 2,figsize=[12,6])
    ax1.set_xlim(0,len(history.history['acc']))
    ax1.plot(history.history['acc'], label='training set', c='xkcd:butterscotch', linewidth=3)
    ax1.plot(history.history['val_acc'], label='test set', c='xkcd:coffee', linewidth=3)
    ax1.set_title('model accuracy')
    ax1.set_ylabel('accuracy')
    ax1.set_xlabel('epoch')
    ax1.legend()

    ax2.set_xlim(0,len(history.history['acc']))
    ax2.plot(history.history['loss'], label='training set', c='xkcd:dull brown', linewidth=3)
    ax2.plot(history.history['val_loss'], label='test set', c='xkcd:taupe', linewidth=3)
    ax2.set_title('model loss')
    ax2.set_ylabel('loss')
    ax2.set_xlabel('epoch')
    ax2.set_yscale('log')
    ax2.legend()
    
    plt.figure(figsize=[7,7])
    plt.xlim(-.01,1.01)
    plt.ylim(-.01,1.01)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(pfn_fp, pfn_tp, c='xkcd:dirt', linewidth=4, label='AUC = %.3f'%auc)
    plt.ylabel('True positive rate')
    plt.xlabel('False positive rate')

In [38]:
history_b2531, fp_2531, tp_2531, auc_2531 = modelNmetrics(X, Y, 1000, 20000)

sizes: X  72000 Y:  72000
(42000, 2000, 3) (10000, 2000, 3) (20000, 2000, 3) (42000, 2000, 3) (10000, 2000, 3) (20000, 2000, 3)
Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, None, 3)]    0                                            
__________________________________________________________________________________________________
tdist_0 (TimeDistributed)       (None, None, 100)    400         input[0][0]                      
__________________________________________________________________________________________________
activation_42 (Activation)      (None, None, 100)    0           tdist_0[0][0]                    
__________________________________________________________________________________________________
tdist_1 (TimeDistributed)       (None, None, 100)    10100     

ValueError: in user code:

    /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:755 train_step
        loss = self.compiled_loss(
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (1000, 2000, 3) and (1000, 2) are incompatible
