### Bayesian optimization

Optimize our AE using a bayesian optimization, with the AUC as the 'score' to regress upon

##### imports

Import `skopt`

In [1]:
import autoencodeSVJ.evaluate as ev
import autoencodeSVJ.utils as utils
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
import numpy as np

Using TensorFlow backend.


In [2]:
space = [
    Integer(2, 20, name='target_dim'),
    Integer(0, 1, name='hlf'),
    Real(1e-4, 1e-1, name='learning_rate', prior='log-uniform'), 
    Integer(3, 4, name='eflow_base'),
    Categorical(categories=['StandardScaler', 'MinMaxScaler'], name='norm_type'),
    Categorical(categories=['mae', 'mse'], name='loss'),
]

In [3]:
filtered = utils.summary().cfilter([elt.name for elt in space] + ["*auc"])

keep = filtered.copy()
for elt in space:
    if isinstance(elt, Categorical):
        keep = keep[keep[elt.name].isin(elt.categories)]
    else:
        keep = keep[keep[elt.name].between(*elt.bounds)]
        if isinstance(elt, Integer):
            keep = keep[keep[elt.name].apply(lambda x: float(x).is_integer())]

aucs = keep.loc[:,~keep.columns.isin([elt.name for elt in space])]

x0 = np.asarray([keep[item.name].values for item in space]).T.tolist()
y0 = map(lambda x: 1 - x,aucs.max(axis=1).tolist())
# x0 = [list(keep [item.name].values) for item in space]
# y0 = auc.max(axis=1).tolist()

In [4]:
random_state = 42

@use_named_args(space)
def objective(**X):
    print "computing configuration {}".format(X)
    
    # minimize 1 - auc
    auc = 1. - ev.ae_train(
        qcd_path='data/background/base_{}/*.h5'.format(X['eflow_base']),
        signal_path="data/signal/base_{}/*.h5".format(X['eflow_base']),
        epochs=100,
        hlf=X['hlf'],
        eflow=1,
        target_dim=X['target_dim'],
#         val_split=X['val_split'],
        learning_rate=X['learning_rate'],
        optimizer='adam',
        loss=X['loss'],
#         loss='mse',
        seed=random_state,
        norm_args={
            'norm_type': X['norm_type'],
        },
        verbose=0,
    )
    
    print "Max AUC:", 1 - auc
    return auc

results = gp_minimize(
    objective, space, verbose=1, 
    n_calls=100, 
    n_random_starts=5, 
    random_state=random_state
)

Iteration No: 1 started. Evaluating function at random point.
computing configuration {'loss': 'mae', 'learning_rate': 0.021830968390524622, 'norm_type': 'StandardScaler', 'target_dim': 16, 'eflow_base': 4, 'hlf': 0}
Max AUC: 0.778291600181
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 140.1837
Function value obtained: 0.2217
Current minimum: 0.2217
Iteration No: 2 started. Evaluating function at random point.
computing configuration {'loss': 'mse', 'learning_rate': 0.00026828750938254388, 'norm_type': 'StandardScaler', 'target_dim': 10, 'eflow_base': 4, 'hlf': 0}
Max AUC: 0.798170635561
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 114.6735
Function value obtained: 0.2018
Current minimum: 0.2018
Iteration No: 3 started. Evaluating function at random point.
computing configuration {'loss': 'mae', 'learning_rate': 0.094762100310105235, 'norm_type': 'MinMaxScaler', 'target_dim': 19, 'eflow_base': 4, 'hlf': 0}
Max AUC: 0.848249196569
Iteration N



Max AUC: 0.849878652906
Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 106.1520
Function value obtained: 0.1501
Current minimum: 0.1106
Iteration No: 30 started. Searching for the next optimal point.
computing configuration {'loss': 'mae', 'learning_rate': 0.0001, 'norm_type': 'MinMaxScaler', 'target_dim': 2, 'eflow_base': 3, 'hlf': 0}
Max AUC: 0.831268239843
Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 133.3263
Function value obtained: 0.1687
Current minimum: 0.1106
Iteration No: 31 started. Searching for the next optimal point.
computing configuration {'loss': 'mse', 'learning_rate': 0.0001, 'norm_type': 'MinMaxScaler', 'target_dim': 20, 'eflow_base': 4, 'hlf': 1}
Max AUC: 0.81514706408
Iteration No: 31 ended. Search finished for the next optimal point.
Time taken: 207.5915
Function value obtained: 0.1849
Current minimum: 0.1106
Iteration No: 32 started. Searching for the next optimal point.
computing configuration {

In [None]:
@use_named_args(space)
def objective(**X):
    print "computing configuration {}".format(X)
    
    # minimize 1 - auc
    auc = 1. - ev.ae_train(
        qcd_path='data/background/base_{}/*.h5'.format(X['eflow_base']),
        signal_path="data/signal/base_{}/*.h5".format(X['eflow_base']),
        epochs=100,
        hlf=X['hlf'],
        eflow=1,
        target_dim=X['target_dim'],
#         val_split=X['val_split'],
        learning_rate=X['learning_rate'],
        optimizer='adam',
        loss=X['loss'],
#         loss='mse',
        seed=random_state,
        norm_args={
            'norm_type': X['norm_type'],
        },
        verbose=0,
    )
    
    print "Max AUC:", 1 - auc
    return auc

results2 = gp_minimize(
    objective, space, verbose=1, 
    n_calls=100, 
    n_random_starts=5, 
    random_state=42
)

In [9]:
from skopt.plots import plot_convergence
import matplotlib.pyplot as plt

plot_convergence(results2)
plt.show()

NameError: name 'results2' is not defined

In [6]:
print "Best parameters:"
for i,elt in enumerate(space):
    print "{} = {}".format(elt.name, results.x[i])

Best parameters:
target_dim = 20
hlf = 1
learning_rate = 0.0001
eflow_base = 3
norm_type = MinMaxScaler
loss = mse


In [14]:
e = ev.ae_evaluation("hlf_eflow3_8_v19")

found 1 matches with search '/afs/cern.ch/work/l/llepotti/private/CMS/CMSSW_8_0_20/src/autoencodeSVJ/autoencode/data/summary/hlf_eflow3_8_v19.summary'


KeyboardInterrupt: 

In [13]:
utils.summary().cfilter(['*auc', 'filename', 'target_dim', 'learning_rate', 'input_dim', 'VID']).sort_values('mae_auc')[::-1]

Unnamed: 0,VID,filename,input_dim,learning_rate,mae_auc,mse_auc,target_dim
101,5,hlf_eflow3_8_v19,19,0.0005,0.895509,0.893366,8
51,5,hlf_eflow3_20_v14,19,0.0001,0.889205,0.889392,20
62,5,hlf_eflow3_20_v22,19,0.0001,0.889205,0.889392,20
73,5,hlf_eflow3_20_v33,19,0.0001,0.889205,0.889392,20
72,5,hlf_eflow3_20_v32,19,0.0001,0.889205,0.889392,20
71,5,hlf_eflow3_20_v31,19,0.0001,0.889205,0.889392,20
70,5,hlf_eflow3_20_v30,19,0.0001,0.889205,0.889392,20
69,5,hlf_eflow3_20_v29,19,0.0001,0.889205,0.889392,20
68,5,hlf_eflow3_20_v28,19,0.0001,0.889205,0.889392,20
67,5,hlf_eflow3_20_v27,19,0.0001,0.889205,0.889392,20
