In [43]:
# imports
import uproot
import numpy as np
from selectionPlots import findAllFilesInPath
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
import keras_tuner as kt

In [50]:
def model_builder(hp):
  model = Sequential()
  hp_layer_1 = hp.Int("layer_1", min_value = 12, max_value = 30, step = 2)
  hp_layer_2 = hp.Int("layer_2", min_value = 10, max_value = 18, step = 2)
  hp_layer_3 = hp.Int("layer_3", min_value = 10, max_value = 16, step = 2)
  model.add(Dense(units = hp_layer_1, input_dim = 13, activation = "relu")) # Hidden layer
  model.add(Dense(units = hp_layer_2, activation = "relu")) # Hidden layer
  model.add(Dense(units = hp_layer_3, activation = "relu")) # Hidden layer
  model.add(Dense(2, activation = "sigmoid")) # 2 output nodes for 2 classes

  model.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = ["accuracy"]) # Compile the model

  return model

In [51]:
sampleNames = findAllFilesInPath("*.root", "nTupleGroups/")
nTupleSamples = dict.fromkeys(sampleNames, 0)
nTupleSamples["nTupleGroups/signalGroup.root"] = 1

# Tuple of variables to get from each file
variables = ["tauPtSum", "zMassSum", "metPt", "deltaRll", "deltaRtt", "deltaRttll", "deltaEtall", "deltaEtatt",
             "nJets", "deltaPhill", "deltaPhitt", "deltaPhilltt", "mmc"]

X = np.array([])
y = np.array([])

In [52]:
cut = "2lep"
for sample in nTupleSamples: # Loop over the samples

  with uproot.open(sample + ":nominal" + cut) as tree:
    XTemp = tree.arrays(variables, library = "pd")
    weight = tree["weight"].array(library = "np")

  XTemp = XTemp.iloc[:, :].values

  # 1 for signal, 0 for background
  yTemp = np.zeros(len(XTemp)) if nTupleSamples[sample] == 0 else np.ones(len(XTemp))

  # Concatenate the arrays
  X = np.concatenate((X, XTemp)) if X.size else XTemp
  y = np.concatenate((y, yTemp)) if y.size else yTemp

In [53]:
# Scale the data
sc = StandardScaler()
X = sc.fit_transform(X)

# One-hot-encode the labels
ohe = OneHotEncoder()
y = ohe.fit_transform(y.reshape(-1,1)).toarray()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [69]:
# Create the tuner
tuner = kt.Hyperband(model_builder,
                      objective = "val_accuracy",
                      max_epochs = 10,
                      factor = 3,
                      directory = "hyperTuning",
                      project_name = "hyperTuning" + cut,
                      overwrite = True)

stop_early = EarlyStopping(monitor = "val_loss", patience = 5)

INFO:tensorflow:Reloading Tuner from ./untitled_project/tuner0.json


In [71]:
# 0.25*0.8 = 0.2 to get 20% of the data for validation
tuner.search(X_train, y_train, epochs = 10, validation_split = 0.25, callbacks = [stop_early])

Trial 5 Complete [00h 00m 02s]
val_accuracy: 0.8056551218032837

Best val_accuracy So Far: 0.8116288185119629
Total elapsed time: 00h 00m 02s

Search: Running Trial #6

Value             |Best Value So Far |Hyperparameter
16                |24                |layer_1
18                |18                |layer_2
16                |14                |layer_3
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2


KeyboardInterrupt: 

In [1]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]
print(tuner.results_summary(num_trials = 1))
print("---")
print("The hyperparameter search is complete. The optimal number of units were\nlayer 1:", best_hps.get("layer_1"),
      "\nlayer 2:", best_hps.get("layer_2"), "\nlayer 3:", best_hps.get("layer_3"))

NameError: name 'tuner' is not defined