In [2]:
import tensorflow as tf
import sklearn
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

In [8]:
from utilities import Data, Consts
import data_pipeline as dp

In [None]:
real = Data(*Consts().get_real_tuple())
siml = Data(*Consts().get_simulated_tuple())

particle_features = ['Theta', 'MINIP', 'PX', 'PY', 'PZ', 'PT', 'ProbNNe', 'ProbNNk', 'ProbNNpi', 'ProbNNp', 'ProbNNmu', 'ProbNNd', 'ProbNNghost']
particles = ['L1', 'L2', 'p', 'K']
request_features = [particle + "_" + feature for feature in particle_features for particle in particles] + ['Lb_DTF_PV_chi2']

rf, sf = real.fetch_features(request_features), siml.fetch_features(request_features + ['Lb_BKGCAT'])

rf['category'] = 0
sf['category'] = np.where(sf['Lb_BKGCAT'].isin([10, 50]), 1, 2)
# Add the category tags before combining these data, 

# 0 = Real background 
# 1 = Simulated Signal
# 2 = Simulated Background

sf.drop('Lb_BKGCAT', inplace=True, axis=1)
sf.reset_index(inplace=True)
rf.reset_index(inplace=True)

probnn_features = [particle + "_" + ft for ft in ['ProbNNe', 'ProbNNk', 'ProbNNpi', 'ProbNNp', 'ProbNNmu', 'ProbNNd', 'ProbNNghost'] for particle in particles]
sf[probnn_features] = sf[probnn_features].mask(sf[probnn_features] < 0, 0)
rf[probnn_features] = rf[probnn_features].mask(rf[probnn_features] < 0, 0)

df = pd.concat([rf, sf], ignore_index=True, sort=False)
df.drop('eventNumber', inplace=True, axis=1)

for particle in particles:
    df[particle + "_P"] = np.sqrt(df[particle + "_PT"]**2 + df[particle + "_PZ"]**2)
    df[particle + '_alpha'] = np.arcsin(df[particle + '_PT']/df[particle + '_P'])
# Alpha is the angle between the particle and the beam axis, not using Theta as this has
# already been used to describe another feature

# Now let us make features describing the absolute differences in normalised transverse
# momentum values between all combinations of the final state particles
for particle in particles:
    df[particle + '_normPT'] = df[particle + "_PT"]/df[particle + "_P"]
    # Make the normalised PT values for each particle easily accessible
    
combinations = np.array([
    ['L1', 'L2'],
    ['L1', 'p'],
    ['L1', 'K'],
    ['L2', 'p'],
    ['L2', 'K'],
    ['p', 'K']
])

for combo in combinations:
    name = f"{combo[0]}_minus_{combo[1]}"
    df[name] = df[combo[0] + "_normPT"] - df[combo[1] + "_normPT"]

xf = df.drop(probnn_features, axis=1)
(X_train, y_train), (X_val, y_val), (X_test, y_test) = dp.prepare_data(xf, train_frac=0.6, val_frac=0.2, test_frac=0.2, random_state=1)

In [4]:
early_stopping = keras.callbacks.EarlyStopping(patience=30, min_delta=0.0005, restore_best_weights=True)

m = keras.Sequential()
m.add(layers.Dense(64, activation='relu', input_shape=(len(df.columns)-1,), kernel_regularizer=l1_l2(0.001, 0.001)))
m.add(layers.Dropout(0.2))
m.add(layers.BatchNormalization())
m.add(layers.Dense(128, activation='relu', kernel_regularizer=l1_l2(0.001, 0.001)))
m.add(layers.Dropout(0.2))
m.add(layers.Dense(128, activation='relu', kernel_regularizer=l1_l2(0.001, 0.001)))
m.add(layers.BatchNormalization())
m.add(layers.Dense(256, activation='relu', kernel_regularizer=l1_l2(0.001, 0.001)))
m.add(layers.Dense(1, activation='sigmoid'))
m.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics='binary_accuracy')
print(m.summary())

2022-02-04 14:46:46.721341: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-04 14:46:46.737562: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


NameError: name 'xf' is not defined

In [None]:
m_history = m.fit(X_train, y_train, epochs=500, batch_size=1024, validation_data=(X_val, y_val), callbacks=[early_stopping])