In [1]:
import time
import numpy as np
import pandas as pd
from tensorflow import keras
from sklearn.model_selection import train_test_split

# Importar dados

As labels dos dados estão definidas tais que:
- Background: 0
- Sinal: 1

In [2]:
# Gather data
X_train = pd.read_hdf("Data/Processed_Data.h5", key='data')
y_train = X_train["Label"].values
X_train.drop(["Label"], axis=1, inplace=True)

In [3]:
# Standardize data
for column in X_train:
    if column != "gen_weights":
        X_train[column] = (X_train[column] - X_train[column].mean()) / X_train[column].std()

In [5]:
for column in X_train: print(f"{column} -> Mean: {X_train[column].mean()} | Std: {X_train[column].std()}")
X_train = X_train.values

Electron1_Eta -> Mean: 8.244256103075231e-16 | Std: 0.9999999999999488
Electron1_PT -> Mean: 5.266764586509037e-16 | Std: 0.9999999999999273
Electron1_Phi -> Mean: -1.1803463218112491e-15 | Std: 1.00000000000046
Electron2_Eta -> Mean: 2.9209884110550466e-16 | Std: 1.0000000000008142
Electron2_PT -> Mean: 1.9314990560111155e-15 | Std: 1.0000000000001907
Electron2_Phi -> Mean: 2.0355066321821372e-15 | Std: 0.9999999999975362
Electron_Multi -> Mean: -3.721393837839373e-15 | Std: 1.0000000000000093
FatJet1_Eta -> Mean: 6.584636470709267e-16 | Std: 1.000000000000354
FatJet1_Mass -> Mean: -1.27604544776005e-15 | Std: 0.9999999999996145
FatJet1_PT -> Mean: -5.496819275422567e-16 | Std: 1.0000000000003848
FatJet1_Phi -> Mean: -2.0402636166451658e-15 | Std: 1.000000000001522
FatJet1_Tau1 -> Mean: -5.424593091886727e-17 | Std: 0.9999999999980551
FatJet1_Tau2 -> Mean: 3.0999559655592917e-15 | Std: 0.9999999999991862
FatJet1_Tau3 -> Mean: 2.731869597770016e-15 | Std: 0.9999999999989034
FatJet1_Tau

In [6]:
# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
print(f"X_train shape: {X_train.shape} | X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape} | y_test shape: {y_test.shape}")

X_train shape: (156580, 70) | X_test shape: (39145, 70)
y_train shape: (156580,) | y_test shape: (39145,)


In [7]:
# Get weights for each dataset
train_weights, test_weights = X_train[:, -1], X_test[:, -1]
X_train, X_test = X_train[:, :-1], X_test[:, :-1]
print(f"X_train shape: {X_train.shape} | X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape} | y_test shape: {y_test.shape}")
print(f"train_weights shape: {train_weights.shape} | test_weights shape: {test_weights.shape}")

X_train shape: (156580, 69) | X_test shape: (39145, 69)
y_train shape: (156580,) | y_test shape: (39145,)
train_weights shape: (156580,) | test_weights shape: (39145,)


# Build the model

In [8]:
inputs = keras.Input(shape=(69,))
fc1 = keras.layers.Dense(90, activation='relu')(inputs)
fc2 = keras.layers.Dense(120, activation='relu')(fc1)
fc3 = keras.layers.Dense(80, activation='relu')(fc2)
fc4 = keras.layers.Dense(40, activation='relu')(fc3)
outputs = keras.layers.Dense(2, activation='softmax')(fc4)

model = keras.Model(inputs, outputs)
model.compile(optimizer='adam', loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 69)]              0         
_________________________________________________________________
dense (Dense)                (None, 90)                6300      
_________________________________________________________________
dense_1 (Dense)              (None, 120)               10920     
_________________________________________________________________
dense_2 (Dense)              (None, 80)                9680      
_________________________________________________________________
dense_3 (Dense)              (None, 40)                3240      
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 82        
Total params: 30,222
Trainable params: 30,222
Non-trainable params: 0
_________________________________________________________

In [9]:
# Model name
name = str(time.time())

# Callbacks 

In [10]:
# Tensorboard
TB = keras.callbacks.TensorBoard("logs/" + name)

# Early Stopping
ES = keras.callbacks.EarlyStopping(monitor="val_loss", patience=6, verbose=2, mode="min")

# Model Checkpoint
MC = keras.callbacks.ModelCheckpoint("models/" + name + ".h5", save_best_only=True, monitor="val_loss",
                                     mode="min")

# Treinar modelo

In [11]:
model.fit(X_train, y_train, batch_size=128, epochs=20, validation_split=0.15,
          sample_weight=train_weights, shuffle=True, callbacks=[TB, ES, MC])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fc6ae41a450>