In [None]:
import numpy as np
import awkward0 as awkward

In [None]:
SEED = 0 # random seed corresponds to the member of the ensemble

In [None]:
import os
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU') # disabling GPU, comment out if you want to use GPU

# uncomment and adjust for GPU calculations
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# os.environ["CUDA_VISIBLE_DEVICES"]="1"

# # dynamic memory growth
# physical_devices = tf.config.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import sys
from os.path import join
#from ROOT import TLorentzVector
from tensorflow import keras
from keras.optimizers import *
from keras.layers import *
from keras import regularizers
#import seaborn as sns
from keras.utils import *
import shutil
from dataset import *
import datetime


In [None]:
tf.random.set_seed(SEED)
np.random.seed(SEED)

In [None]:
NO_OF_PARTICLES = 250 # how many particles in an event to use, default 250
PT_CUT = 1.0 # Lower cut on PT particles, default is 1.0
OUTPUT = f'model-{SEED}' # name of the output folder

In [None]:
if not os.path.exists(OUTPUT):
  # Create a new directory because it does not exist 
  os.makedirs(OUTPUT)

In [None]:
import logging
logging.basicConfig(filename=join(OUTPUT, 'training.log'),
                    filemode='a',
                    format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                    datefmt='%H:%M:%S',
                    level=logging.INFO)

In [None]:
shutil.copy('train-ensemble.ipynb', join(OUTPUT, 'train-ensemble.ipynb')) # copy notebook to the output dir

## Load validation set. Adjust the data path if needed.

In [None]:
val_dataset = Dataset(f'data/ensemble_val_{SEED}.awkd', {}, data_format='channel_last', simple_mode=False, pad_len=NO_OF_PARTICLES)

## Load training set. Adjust the data path if needed.

In [None]:
train_dataset = Dataset(f'data/ensemble_train_{SEED}.awkd', {}, data_format='channel_last', simple_mode=False, pad_len=NO_OF_PARTICLES)

## Calculate means and standard deviations, and normalise the data.

In [None]:
scalers = train_dataset.normalize_all(scalers = None)

In [None]:
val_dataset.normalize_all(scalers=scalers)

## Write scalers to file

In [None]:
string = ''
for k,v in scalers.items():
    string += k + '\n'
    string += f'mean: {v.mean_[0]}'+ '\n'
    string += f'var: {v.var_[0]}' + '\n'
    string += f'scale: {v.scale_[0]}' + '\n'
print(string)
with open(join(OUTPUT, 'scaler.txt'), 'w') as f:
    f.write(string)

## Apply PT cut

In [None]:
def apply_pt_cut(data, cut=PT_CUT):
    cond = data.X['mask'] < np.log(cut)
    cond2 = data.X['mask'] >= np.log(cut)
    print(cond.shape)
    print('below the cut: ', np.sum(cond))
    print('above the cut: ', np.sum(cond2))
    print('sum: ', np.sum(cond) + np.sum(cond2))
    data.X['mask'][cond] = 0.0
    ext_shape = list(cond.shape)
    ext_shape[-1] = 4
    new_cond = np.repeat(cond, 5, axis=2)
    print(new_cond.shape)
    data.X['features'][new_cond] = 0.0
  
    return data, np.sum(cond2, axis=1).flatten()

In [None]:
train_dataset, _ = apply_pt_cut(train_dataset, PT_CUT)
val_dataset, _ = apply_pt_cut(val_dataset, PT_CUT)

## Load the ParticleNet model

In [None]:
import tf_keras_model

model_type = 'particle_net_lite' # choose between 'particle_net' and 'particle_net_lite'
num_classes = train_dataset.y.shape[1]
input_shapes = {k:train_dataset[k].shape[1:] for k in train_dataset.X}
if 'lite' in model_type:
    model = tf_keras_model.get_particle_net_lite(num_classes, input_shapes)
else:
    model = tf_keras_model.get_particle_net(num_classes, input_shapes)

## Create a dense network for high-level variables

In [None]:
model2_inputs = keras.Input(shape=(5+4*4,))
xx = model2_inputs
xx = keras.layers.Dense(256, activation=None, kernel_regularizer=keras.regularizers.L2(1e-3))(xx)
xx = keras.layers.BatchNormalization()(xx)
xx = keras.layers.Activation(tf.nn.relu)(xx)
for ii in range(5):
    xx = keras.layers.Dense(128, activation=None, kernel_regularizer=keras.regularizers.L2(1e-3))(xx)
    xx = keras.layers.BatchNormalization()(xx)
    xx = keras.layers.Activation(tf.nn.relu)(xx)    
xx = keras.layers.Dropout(0.5)(xx)
xx = keras.layers.Dense(2, activation="softmax",)(xx)

model2 = keras.Model(model2_inputs, xx)

In [None]:
model2.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.Adam(learning_rate=5e-4), #lr_scheduler),
              metrics = [tf.keras.metrics.BinaryAccuracy()],
              weighted_metrics=[tf.keras.metrics.AUC()])


## Modify the GNN part

In [None]:
#Improved particle net
yy = model.layers[-4].output
yy = keras.layers.Dense(256, activation=None, kernel_regularizer=keras.regularizers.L2(1e-3))(yy)
yy = keras.layers.BatchNormalization()(yy)
yy = keras.layers.Activation(tf.nn.relu)(yy)
yy = keras.layers.Dropout(0.5)(yy)
yy = keras.layers.Dense(128, activation=None, kernel_regularizer=keras.regularizers.L2(1e-3))(yy)
yy = keras.layers.BatchNormalization()(yy)
yy = keras.layers.Activation(tf.nn.relu)(yy)

model3 = keras.Model(model.input, yy)

## Merge two models

In [None]:
merged_layer = keras.layers.Concatenate()([model2.layers[-3].output, model3.layers[-1].output])
z = merged_layer
z = keras.layers.Dense(128, activation=None, kernel_regularizer=keras.regularizers.L2(1e-3))(z)
z = keras.layers.BatchNormalization()(z)
z = keras.layers.Activation(tf.nn.relu)(z)
z = keras.layers.Dropout(0.5)(z)
z = keras.layers.Dense(2, activation="softmax",)(z)
merged_model = keras.Model([model2.input, model3.input], z)

## Set the batch size

In [None]:
batch_size = 1024

# Warmup cosine decay

In [None]:
from cosine_decay import *
lr_schedule = WarmUpCosineDecay(start_lr=0, target_lr=5e-4, warmup_steps=10*800, total_steps=310*800, hold=0, final_lr=1e-5)

## Compile

In [None]:
merged_model.compile(loss='binary_crossentropy',
                     optimizer=keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=5e-4),
                     metrics = [tf.keras.metrics.BinaryAccuracy()],
                     weighted_metrics=[tf.keras.metrics.AUC()])
merged_model.summary()

In [None]:
# Prepare model checkpoint directory.
import os
save_dir = join(OUTPUT, 'model_checkpoints')
model_name = 'model.{epoch:03d}.h5'
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = keras.callbacks.ModelCheckpoint(filepath=filepath,
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True)

progress_bar = keras.callbacks.ProgbarLogger()


In [None]:
# define early stopping
earlystop = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=50,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=True,
)

In [None]:
callbacks = [checkpoint, progress_bar, earlystop,]

# Training

In [None]:
# calculate weights
bkg_n = np.sum(train_dataset.y[:,1] == 1.0)
sig_n = np.sum(train_dataset.y[:,0] == 1.0)
total = bkg_n + sig_n
print(bkg_n, sig_n, total)
weight_for_bkg = (1 / bkg_n) * (total / 2.0)
weight_for_sig = (1 / sig_n) * (total / 2.0)
class_weights = {0: weight_for_sig, 1: weight_for_bkg}
print(class_weights) # dataset is very well balanced but we use weights anyway

In [None]:
train_dataset.shuffle()

## Handle high-level data

In [None]:
curr_dataset = train_dataset
dnn_vars = [curr_dataset['event_met'], curr_dataset['event_ht'], curr_dataset['event_eta'], curr_dataset['event_m'], curr_dataset['event_MT2'], ]
for ii in range(0, 4):
    dnn_vars.append(curr_dataset.X['jet'][:, ii, 0])
    dnn_vars.append(curr_dataset.X['jet'][:, ii, 1])
    dnn_vars.append(curr_dataset.X['jet'][:, ii, 3])
    dnn_vars.append(curr_dataset['Dphi'][:, ii])
model2_train_X = list(zip(*dnn_vars))
model2_train_X = np.array(model2_train_X)
model2_train_X.shape


In [None]:
curr_dataset = val_dataset
dnn_vars = [curr_dataset['event_met'], curr_dataset['event_ht'], curr_dataset['event_eta'], curr_dataset['event_m'], curr_dataset['event_MT2'], ]
for ii in range(4):
    dnn_vars.append(curr_dataset.X['jet'][:, ii, 0])
    dnn_vars.append(curr_dataset.X['jet'][:, ii, 1])
    dnn_vars.append(curr_dataset.X['jet'][:, ii, 3])
    dnn_vars.append(curr_dataset['Dphi'][:, ii])

model2_val_X = list(zip(*dnn_vars))
model2_val_X = np.array(model2_val_X)
model2_val_X.shape

In [None]:
train_dataset.X['features'].shape

# Train the network

In [None]:
history = merged_model.fit([model2_train_X, train_dataset.X['points'], train_dataset.X['features'], train_dataset.X['mask']], train_dataset.y,
          batch_size=batch_size,
          epochs=3000,
          validation_data=([model2_val_X, val_dataset.X['points'],  val_dataset.X['features'],  val_dataset.X['mask'],], val_dataset.y),
          class_weight=class_weights,
          shuffle=True,
          verbose=2,                 
          callbacks=callbacks)

In [None]:
merged_model.save(OUTPUT) # save it to disk

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='val')
plt.title('Loss function of the model')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend()
plt.savefig('{}/loss.pdf'.format(OUTPUT))
plt.show()

In [None]:
# accuracy
fig = plt.figure()
plt.plot(history.history['binary_accuracy'], label='train')
plt.plot(history.history['val_binary_accuracy'], label='val')
plt.title('Accuracy of the model')
plt.xlabel('epoch')
plt.ylabel('binary accuracy')
plt.legend()
plt.hlines(xmin=1, xmax=len(history.history['binary_accuracy']), y=0.8, color='green', linestyle='--')
plt.hlines(xmin=1, xmax=len(history.history['binary_accuracy']), y=0.9, color='green', linestyle=':')
plt.vlines(ymin=np.min(history.history['val_binary_accuracy']), ymax=0.9, x = len(history.history['val_binary_accuracy'])-51.5, color='red',linestyle=':')
plt.savefig('{}/accuracy.pdf'.format(OUTPUT))
# plt.show()
# plt.savefig('{}/score.pdf'.format(OUTPUT))
# history.history['accuracy']