In [34]:
# imports
import numpy as np
import keras_tuner as kt
from keras.layers import Dense
from keras.optimizers import Adam
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from nnTrain import getSplitData
from selectionPlots import findAllFilesInPath

In [35]:
def model_builder(hp):
  model = Sequential()
  hp_layer_1 = hp.Int("layer_1", min_value = 22, max_value = 26, step = 2)
  hp_layer_2 = hp.Int("layer_2", min_value = 12, max_value = 16, step = 2)
  hp_layer_3 = hp.Int("layer_3", min_value = 10, max_value = 16, step = 2)
  model.add(Dense(units = hp_layer_1, input_dim = 13, activation = "relu")) # Hidden layer
  model.add(Dense(units = hp_layer_2, activation = "relu")) # Hidden layer
  model.add(Dense(units = hp_layer_3, activation = "relu")) # Hidden layer
  model.add(Dense(1, activation = "sigmoid")) # 2 output nodes for 2 classes

  learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling = "log")

  # Compile the model
  model.compile(loss = "binary_crossentropy", optimizer = Adam(learning_rate = learning_rate),
                metrics = ["accuracy"])

  return model

In [36]:
sampleNames = findAllFilesInPath("*.root", "nTupleGroups/")
nTupleSamples = dict.fromkeys(sampleNames, 0)
nTupleSamples["nTupleGroups/signalGroup.root"] = 1

# Tuple of variables to get from each file
variables = ["tauPtSum", "zMassSum", "metPt", "deltaRll", "deltaRtt", "deltaRttll", "deltaEtall", "deltaEtatt",
             "nJets", "deltaPhill", "deltaPhitt", "deltaPhilltt", "mmc"]

cut = "2lep"
X_train, X_test, y_train, y_test = getSplitData(nTupleSamples, variables, cut, 0)

In [37]:
# Create the tuner
tuner = kt.Hyperband(model_builder,
                      objective = "val_accuracy",
                      max_epochs = 10,
                      factor = 3,
                      directory = "hyperTuning",
                      project_name = "hyperTuning" + cut,
                      overwrite = True)

stop_early = EarlyStopping(monitor = "val_loss", patience = 5)

In [38]:
# 0.25*0.8 = 0.2 to get 20% of the data for validation
tuner.search(X_train, y_train, epochs = 10, validation_split = 0.25, callbacks = [stop_early])

Trial 30 Complete [00h 00m 06s]
val_accuracy: 0.827307939529419

Best val_accuracy So Far: 0.8395739197731018
Total elapsed time: 00h 01m 20s
INFO:tensorflow:Oracle triggered exit


In [42]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]
# print(tuner.results_summary(num_trials = 1))
# print("---")
print("The hyperparameter search is complete. The optimal number of units were\nlayer 1:", best_hps.get("layer_1"),
      "\nlayer 2:", best_hps.get("layer_2"), "\nlayer 3:", best_hps.get("layer_3"), "\nLearning rate:",
      best_hps.get("lr"))

The hyperparameter search is complete. The optimal number of units were
layer 1: 22 
layer 2: 14 
layer 3: 14 
Learning rate: 0.008107639036535629
