In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from metrics_features import fi_perm
from dnn_tau import Dnn_tau
from data_extractor import Data_extractor_v2, output_vars_v2
import os
import fnmatch
from utils import normalize, bucketize, split_dataset
from copy import deepcopy
import pickle
from metrics_features import *

In [2]:
path = "/data/hnl/prompt_tau/anatuple/nanoV10/TEST9/"
features = deepcopy(output_vars_v2)
features.extend(['signal_label', 'channel', 'event_type', 'mass_hyp'])
channels = os.listdir(path)
relative_path = "/anatuple/"

In [3]:
values = []
for i in range(len(features)):
    values.append([])
data = dict(zip(features, values))

for channel in channels:
    extractor = Data_extractor_v2(channel)
    data = extractor(path+channel+relative_path, data=data)

In [4]:
N = len(data['event'])
n_bkg = N-sum([data['signal_label'][i] for i in range(len(data['signal_label']))])
data_norm = normalize(pd.DataFrame(data), 'mass_hyp', n_bkg)
data_norm = normalize(data_norm, 'signal_label', n_bkg)
data_norm = normalize(data_norm, 'channel', n_bkg)
data_processed, channel_indices = bucketize(data_norm, 'channel')
print(list(data_processed.keys()))
print(channel_indices)

['event', 'genWeight', 'deltaphi_12', 'deltaphi_13', 'deltaphi_23', 'deltaeta_12', 'deltaeta_13', 'deltaeta_23', 'deltaR_12', 'deltaR_13', 'deltaR_23', 'pt_123', 'mt_12', 'mt_13', 'mt_23', 'Mt_tot', 'n_tauh', 'signal_label', 'channel', 'event_type', 'mass_hyp']
{'tte': 0, 'tee': 1, 'tmm': 2, 'tem': 3, 'ttm': 4}


In [5]:
input_vars = ['deltaphi_12', 'deltaphi_13', 'deltaphi_23', 'deltaeta_12', 'deltaeta_13', 'deltaeta_23', 'deltaR_12', 'deltaR_13', 'deltaR_23',
              'pt_123', 'mt_12', 'mt_13', 'mt_23', 'Mt_tot', 'signal_label', 'channel', 'mass_hyp']

In [6]:
train, val, test, meas = split_dataset(data_processed)

Total number of events :  6818970
Train set : 37.52 %
Validation set : 12.51 %
Test set : 24.98 %
Measurement set : 24.99 %


In [10]:
extracted_data_path = "extracted_data/TEST9_global_v2_"
train.to_pickle(extracted_data_path+"train")
test.to_pickle(extracted_data_path+"test")
val.to_pickle(extracted_data_path+"val")
meas.to_pickle(extracted_data_path+"meas")

In [2]:
extracted_data_path = "extracted_data/TEST9_global_v2_"
train = pd.read_pickle(extracted_data_path+"train")
test = pd.read_pickle(extracted_data_path+"test")
val = pd.read_pickle(extracted_data_path+"val")
meas = pd.read_pickle(extracted_data_path+"meas")

input_vars = ['deltaphi_12', 'deltaphi_13', 'deltaphi_23', 'deltaeta_12', 'deltaeta_13', 'deltaeta_23', 'deltaR_12', 'deltaR_13', 'deltaR_23',
              'pt_123', 'mt_12', 'mt_13', 'mt_23', 'Mt_tot', 'signal_label', 'channel', 'mass_hyp']

x_train = train[input_vars]
x_test = test[input_vars]
x_val = val[input_vars]
x_meas = meas[input_vars]

label_train = x_train.pop('signal_label').astype(float)
label_val = x_val.pop('signal_label').astype(float)
label_test = x_test.pop('signal_label').astype(float)
label_meas = x_meas.pop('signal_label').astype(float)

learning_features = list(x_train.keys())

In [None]:
depths = [len(learning_features)*2]*5
print(depths)
model = Dnn_tau(list(x_train.keys()), depths=depths)
model.compile(loss='binary_crossentropy', 
                  optimizer='adam', 
                  metrics=['loss', 'accuracy'])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=7)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath="./saved_models/checkpoint",
    monitor = "val_loss",
    save_best_only = True
)
history = model.fit(x_train, label_train, sample_weight=train['genWeight'], validation_data=(x_val, label_val), epochs=100000, verbose=1, 
                    batch_size = 400, callbacks=[early_stopping, checkpoint])
model = tf.keras.models.load_model('./saved_models/checkpoint')
model.save('./saved_models/TEST9_global_v2')
# Save history
filename = "./saved_history/TEST9_global_vs.pkl"
with open(filename, "wb") as file:
    pickle.dump(history.history, file)

[32, 32, 32, 32, 32]
Epoch 1/100000
INFO:tensorflow:Assets written to: ./saved_models/checkpoint/assets


INFO:tensorflow:Assets written to: ./saved_models/checkpoint/assets


Epoch 2/100000
INFO:tensorflow:Assets written to: ./saved_models/checkpoint/assets


INFO:tensorflow:Assets written to: ./saved_models/checkpoint/assets


Epoch 3/100000
Epoch 4/100000
Epoch 5/100000
Epoch 6/100000
Epoch 7/100000
Epoch 8/100000
INFO:tensorflow:Assets written to: ./saved_models/checkpoint/assets


INFO:tensorflow:Assets written to: ./saved_models/checkpoint/assets


Epoch 9/100000
Epoch 10/100000
Epoch 11/100000
Epoch 12/100000
Epoch 13/100000
Epoch 14/100000
Epoch 15/100000
Epoch 16/100000
Epoch 17/100000
Epoch 18/100000
Epoch 19/100000
Epoch 20/100000
Epoch 21/100000
Epoch 22/100000
Epoch 23/100000
Epoch 24/100000
Epoch 25/100000
Epoch 26/100000
Epoch 27/100000
Epoch 28/100000
Epoch 29/100000
Epoch 30/100000
Epoch 31/100000
Epoch 32/100000




INFO:tensorflow:Assets written to: ./saved_models/TEST9_global_v2/assets


INFO:tensorflow:Assets written to: ./saved_models/TEST9_global_v2/assets


In [3]:
model = tf.keras.models.load_model('./saved_models/TEST9_global_v2')

2023-05-01 14:33:08.594909: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-01 14:33:09.492643: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6673 MB memory:  -> device: 0, name: Quadro RTX 4000, pci bus id: 0000:17:00.0, compute capability: 7.5
2023-05-01 14:33:09.493547: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 6653 MB memory:  -> device: 1, name: Quadro RTX 4000, pci bus id: 0000:65:00.0, compute capability: 7.5


In [5]:
print(model.metrics_names)

[]


In [5]:
delta_loss_perm = []
loss_no_shuffle = fi_perm(model, test, input_vars, [])
for key in learning_features:
    loss_shuffle = fi_perm(model, test, input_vars, key)
    delta_loss_perm.append(loss_shuffle-loss_no_shuffle)
delta_loss_perm = dict(zip(learning_features, delta_loss_perm))
print(delta_loss_perm)

x_test defined
y_test defined
Loss with shuffle evaluated
x_test defined
y_test defined
0    0.619110
1    0.510193
2    1.376953
3    0.476545
4    2.452375
5    2.357056
6    0.287598
7    0.482910
8    2.813477
9    0.016235
Name: deltaphi_12, dtype: float64
0    2.343262
1    0.343262
2    1.180542
3    1.886701
4    1.627441
5    1.762433
6    1.044800
7    0.361206
8    2.951660
9    2.946777
Name: deltaphi_12, dtype: float64
permutated x_test defined
Loss with shuffle evaluated
x_test defined
y_test defined
0    0.387207
1    2.619812
2    1.838745
3    2.605133
4    0.611328
5    2.549561
6    2.984357
7    2.852399
8    2.425031
9    1.274292
Name: deltaphi_13, dtype: float64
0    2.636213
1    0.272949
2    0.893677
3    1.065918
4    1.003174
5    2.407575
6    1.745832
7    2.668213
8    0.846174
9    2.796906
Name: deltaphi_13, dtype: float64
permutated x_test defined
Loss with shuffle evaluated
x_test defined
y_test defined
0    1.006317
1    2.109619
2    3.067487
3    3

In [7]:
filename = "./saved_results/TEST9_global_v1_loss_shuffle.pkl"
with open(filename, "wb") as file:
    pickle.dump(delta_loss_perm, file)

In [8]:
filename = "./saved_results/TEST9_global_v1_loss_shuffle.pkl"
with open(filename, "rb") as f:
    delta_loss_perm = pickle.load(f)

In [20]:
for key in delta_loss_perm:
    print(key, "  \t: {:.5f}".format(delta_loss_perm[key]))

deltaphi_12   	: 0.00105
deltaphi_13   	: 0.00078
deltaphi_23   	: 0.00094
deltaeta_12   	: 0.01099
deltaeta_13   	: 0.01067
deltaeta_23   	: 0.00377
deltaR_12   	: 0.00212
deltaR_13   	: 0.00176
deltaR_23   	: 0.00179
pt_123   	: 0.07949
mt_12   	: 0.07728
mt_13   	: 0.03374
mt_23   	: 0.16063
Mt_tot   	: 0.87094
channel   	: 0.63062
mass_hyp   	: 4.22326


In [21]:
from metrics_features import fi_mutual_info
mis = fi_mutual_info(model, test, input_vars)

  y = column_or_1d(y, warn=True)
