In [72]:
# imports
import numpy as np
import keras_tuner as kt
from keras.layers import Dense
from keras.optimizers import Adam
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from nnTrain import getSplitData
from selectionPlots import findAllFilesInPath

In [78]:
def model_builder(hp):
  model = Sequential()
  ac = hp.Choice("activation", ["relu", "tanh", "elu"])

  hp_layer_1 = hp.Int("layer_1", min_value = 20, max_value = 26, step = 2)
  model.add(Dense(units = hp_layer_1, input_dim = 13, activation = ac)) # 1st Hidden layer

  for i in range(hp.Int("num_layers", 2, 6)):
    model.add(Dense(units = hp.Int(f"units_{i}", min_value = 14, max_value = 30, step = 2),
                    activation = ac)) # Hidden layer

  model.add(Dense(1, activation = "sigmoid"))

  learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling = "log")

  # Compile the model
  model.compile(loss = "binary_crossentropy", optimizer = Adam(learning_rate = learning_rate),
                metrics = ["accuracy"])

  return model

In [79]:
sampleNames = findAllFilesInPath("*.root", "nTupleGroups/")
nTupleSamples = dict.fromkeys(sampleNames, 0)
nTupleSamples["nTupleGroups/signalGroup.root"] = 1

# Tuple of variables to get from each file
variables = ["tauPtSum", "zMassSum", "metPt", "deltaRll", "deltaRtt", "deltaRttll", "deltaEtall", "deltaEtatt",
             "nJets", "deltaPhill", "deltaPhitt", "deltaPhilltt", "mmc"]

cut = "2lep"
X_train, X_test, y_train, y_test = getSplitData(nTupleSamples, variables, cut, 0)

In [80]:
# Create the tuner
tuner = kt.Hyperband(model_builder,
                      objective = "val_accuracy",
                      max_epochs = 30,
                      factor = 3,
                      directory = "hyperTuning",
                      project_name = "hyperTuning" + cut,
                      overwrite = True)

stop_early = EarlyStopping(monitor = "val_loss", patience = 5)

In [81]:
# 0.25*0.8 = 0.2 to get 20% of the data for validation
tuner.search(X_train, y_train, epochs = 10, validation_split = 0.25, callbacks = [stop_early])

Trial 90 Complete [00h 00m 06s]
val_accuracy: 0.8279535174369812

Best val_accuracy So Far: 0.8337637186050415
Total elapsed time: 00h 05m 00s
INFO:tensorflow:Oracle triggered exit


In [82]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]

print("The hyperparameter search is complete. The optimal number of layers was:", best_hps.get("num_layers"))

for i in range(best_hps.get("num_layers")):
  print("layer", i, ":", best_hps.get(f"units_{i}"), best_hps.get("activation"))

print("Learning rate:", best_hps.get("lr"))

The hyperparameter search is complete. The optimal number of layers was: 3
layer 0 : 22 elu
layer 1 : 18 elu
layer 2 : 18 elu
Learning rate: 0.004537750100817641
