In [1]:
import tensorflow as tf
from tensorflow import keras
# GPU on
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    print(gpu)
    tf.config.experimental.set_memory_growth(gpu, True)
import tensorflow.keras.layers as tfl
import keras_tuner as kt

import matplotlib.pyplot as plt
import os
import sys
sys.path.append('../')
sys.path.append('../nn_builds')

from nn_constructor import *
from nn_to_tune import *
from ds_making import *
import losses

from compare_auc_delong_xu import *

2023-05-30 10:39:29.009940: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-30 10:39:34.920089: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/albert/miniconda3/envs/Baikal2/lib/:/home/albert/miniconda3/envs/Baikal2/lib/
2023-05-30 10:39:34.920296: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/albert/miniconda3/envs/Baikal2/lib/:/h

PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [2]:
def separate_preds(preds, labels):
    with h5.File(path_to_h5, 'r') as hf:
        idxs_mu = np.where(labels == 0)[0]
        idxs_nu = np.where(labels == 1)[0]
        preds_mu = preds[idxs_mu]
        preds_nu = preds[idxs_nu]
    return preds_mu, preds_nu

def plot_hists(preds, labels, bins = 100, regime = 'test'):  
    preds_mu, preds_nu = separate_preds(preds, labels)
    fig = plt.figure(figsize=(15, 10))
    plt.title("Density of events vs predictions", fontsize=14)
    plt.hist(preds_mu, bins = bins, histtype = 'step', density = True, log = True, label = 'only Mu')
    plt.hist(preds_nu, bins = bins, histtype = 'step', density = True, log = True, label = 'only Nu')
    plt.legend(fontsize=14, loc=9)
    plt.xlabel("predicted confidence", fontsize=14)
    plt.ylabel("density of events", fontsize=14)
    #plt.savefig('Figures/'+model_name+'/Hist_of_events_'+regime+'.png')
    #plt.close(fig)
    return

In [3]:
def expos(tr, preds_nu):
    num = np.sum(np.where(preds_nu>tr, 1, 0))
    return num/len(preds_nu)

def array_expos(preds_nu, arr_tr = np.linspace(0,1,1000)):
    arr = np.zeros(len(arr_tr))
    for n, tr in enumerate(arr_tr):
        arr[n] = expos(tr,preds_nu)
    return arr

In [4]:
def my_metric(labels,preds):
    preds_mu, preds_nu = separate_preds(preds, labels)
    index_mu = np.where(array_expos(preds_mu) < 5e-5)[0][0]
    tr_best = index_mu/10**int(np.log10(index_mu)+1)
    return expos(tr_best, preds_nu)

In [5]:
def train_model_to_tune(model, path_to_h5, batch_size, lr_initial, 
                model_name,
                num_of_epochs = 10, steps_per_epoch = 1e4, verbose = 1, is_mask = True):
    with h5.File(path_to_h5, 'r') as hf:
        total_num = hf['train/data/data'].shape[0]
        total_steps = total_num//batch_size
    optimizer = tf.keras.optimizers.Adam(learning_rate = lr_initial, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, name='Adam')
    model.compile(optimizer=optimizer, loss=losses.focal_loss(2., 2., 10., 1.))
    callbacks = [tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, min_delta = 2e-3)]

    train_dataset = make_train_dataset(path_to_h5, batch_size, Shape, is_mask = is_mask)

    history = model.fit(train_dataset, 
                        steps_per_epoch=steps_per_epoch, epochs=num_of_epochs, 
                        verbose=verbose, callbacks=callbacks)

    return history

In [6]:
file_name = "baikal_mu-nu_h5-s2_tres08_old_norm.h5"
path_to_h5 = '../../../../../../ivkhar/Baikal/data/' + file_name
mn = 'nn_mask_1lstm_3resblocks_1lstm'

In [7]:
model_name = mn + '_noise_' + file_name[0:-3]
is_mask = True
try:
    os.makedirs('logs/'+model_name)
    print('directory for the model is created')
except:
    print('directory for the model already exists')

directory for the model already exists


In [8]:
# getting the shape of data
with h5.File(path_to_h5, 'r') as hf:
    Shape = hf['train/data/data'].shape[1:]
    if is_mask:
        Shape = tuple([None,Shape[1]+1])

In [23]:
#space of hyperparams

k_id_list = [0]*3
f_id_list = [0]*3
k_cd_list = [0]*3
f_cd_list = [0]*3
s_cd_list = [0]*3

u1_list = [4,8,16]

k_id_list[0] = [4,8,16]
k_id_list[1] = [4,8,16]
k_id_list[2] = [4,8,16]

k_cd_list = k_id_list

f_id_list[0] = [64] #[32,64,128]
f_id_list[1] = [128] #[32,64,128]
f_id_list[2] = [64] #[32,64,128]

f_cd_list = f_id_list

s_cd_list = [[2],[2],[2]]

u2_list = [4,8,16]

lr_list = [0.005]
batch_size = 256

keys = ['u1','u2',
        'f_id_list_0','f_id_list_1','f_id_list_2',
        'k_id_list_0','k_id_list_1','k_id_list_2',
        'f_cd_list_0','f_cd_list_1','f_cd_list_2',
        'k_cd_list_0','k_cd_list_1','k_cd_list_2',
        's_cd_list_0','s_cd_list_1','s_cd_list_2']
space = {'u1':u1_list,'u2':u2_list,
        'f_id_list_0':f_id_list[0],'f_id_list_1':f_id_list[1],'f_id_list_2':f_id_list[2],
        'k_id_list_0':k_id_list[0],'k_id_list_1':k_id_list[1],'k_id_list_2':k_id_list[2],
        'f_cd_list_0':f_cd_list[0],'f_cd_list_1':f_cd_list[1],'f_cd_list_2':f_cd_list[2],
        'k_cd_list_0':k_cd_list[0],'k_cd_list_1':k_cd_list[1],'k_cd_list_2':k_cd_list[2],
        's_cd_list_0':s_cd_list[0],'s_cd_list_1':s_cd_list[1],'s_cd_list_2':s_cd_list[2]}

def generate_idxs(space = space):
    keys = space.keys()
    lengths = [len(space[k]) for k in keys]
    arrays = [np.arange(l) for l in lengths]
    t = np.meshgrid(*arrays)
    for i in range(len(t)):
        t[i] = t[i].flatten()
    for i in zip(*t):
        idxs = list(i)
        yield idxs 

def make_config(idxs, space = space):
    keys = list(space.keys())
    assert len(idxs) == len(keys)
    config = dict.fromkeys(keys)
    for i,k in enumerate(keys):
        config[k] = space[k][idxs[i]]
    f_id_list = [config[keys[m]] for m in range(2,5)]
    #k_id_list = [config[keys[m]] for m in range(5,8)]
    f_id_list = [config[keys[m]] for m in range(8,11)]
    k_cd_list = [config[keys[m]] for m in range(11,14)]
    k_id_list = k_cd_list #temporary
    s_cd_list = [config[keys[m]] for m in range(14,17)]
    keys_new = ['u1','u2',
                'f_id_list', 
                'k_id_list',
                'f_cd_list',
                'k_cd_list',
                's_cd_list']
    config_new = {'u1':config['u1'],'u2':config['u2'],
                'f_id_list':f_id_list, 
                'k_id_list':k_id_list,
                'f_cd_list':f_id_list,
                'k_cd_list':k_cd_list,
                's_cd_list':s_cd_list}
    return config_new 

In [24]:
test_dataset = make_test_dataset(path_to_h5, batch_size, (None,None,6), is_mask = is_mask)
with h5.File(path_to_h5, 'r') as hf:
    labels = np.zeros((339968, 2))
    ids = hf['test/ev_ids/data'][:]  # id of event - starting with 'nu' or 'mu'
    ids = np.array([i[0] for i in ids]).reshape(ids.shape[0],1)
    labels = np.where(ids[0:339968] == 110, [0,1], [1,0])  # 110 - byte code for 'n'
labels = labels[:,1]

In [None]:
lr = 5e-3
f_list = [64,128,64]


steps_per_epoch = 1e3
num_of_epochs = 20
print('Max steps in trial:', int(steps_per_epoch*num_of_epochs))

trial = 0
logs = []
config_list = []
auc_list, metric_list = [], []

I = generate_idxs()
for j in range(27):
    config = make_config(next(I))
    trial +=1
    print('Trial # is ', trial)
    metrics = []
    aucs = []
    print('Learning rate:', lr,'.\nConfig:', config)
    for i in range(4):
        model = globals()[mn]((None,6), **config)
        history = train_model_to_tune(model, path_to_h5, batch_size, lr, 
                                      model_name, steps_per_epoch = steps_per_epoch, num_of_epochs = num_of_epochs)
        print('Predictions on test dataset:')
        preds = model.predict(test_dataset)
        preds = preds[:,1]
        metric = my_metric(labels,preds)
        auc, std_auc = delong_roc_variance(labels,preds)
        metrics.append(metric)
        aucs.append([auc,std_auc])
        print('AUC:', str(np.round(auc, 6))+'+-'+str(np.round(std_auc, 14)),'. Exposition:', np.round(metric,4), '. \n')
    config_list.append(config)
    auc_list.append(np.array(aucs)), metric_list.append(np.array(metrics))
res = {'auc':auc_list, 'my_metric': metric_list}
logs.append([config_list, res])

Max steps in trial: 20000
Trial # is  1
Learning rate: 0.005 .
Config: {'u1': 4, 'u2': 4, 'f_id_list': [64, 128, 64], 'k_id_list': [4, 4, 4], 'f_cd_list': [64, 128, 64], 'k_cd_list': [4, 4, 4], 's_cd_list': [2, 2, 2]}
Epoch 1/20
Epoch 2/20
Epoch 3/20

In [22]:
next(I)
config = make_config(next(I))
#config

{'u1': 4, 'u2': 4, 'f_id_list': [64, 128, 64], 'k_id_list': [4, 8, 8], 'f_cd_list': [64, 128, 64], 'k_cd_list': [4, 8, 8], 's_cd_list': [2, 2, 2]}


In [39]:
config

{'u1': 4,
 'u2': 4,
 'f_id_list': [64, 128, 64],
 'k_id_list': [16, 16, 16],
 'f_cd_list': [64, 128, 64],
 'k_cd_list': [16, 16, 16],
 's_cd_list': [2, 2, 2]}

In [53]:
def get_trial_info(i, logs = logs):
    info = dict.fromkeys(logs[0][0][0].keys())
    for k in logs[0][0][0].keys():
        info[k] = logs[0][0][i][k]
    for k in logs[0][1].keys():
        info[k] = logs[0][1][k][i]
    info['auc'] = min([j[0] for j in info['auc']])
    info['my_metric_min'] = min([j for j in info['my_metric']])
    info['my_metric_max'] = max([j for j in info['my_metric']])
    info['my_metric_mean'] = np.mean([j for j in info['my_metric']])
    return info

In [54]:
mm = 0
for n, l in enumerate(logs[0][0]):
    info = get_trial_info(n)
    i = info['my_metric_mean']
    if i>mm:
        mm = i
        t = n
print('Best trial is:')
get_trial_info(t)

Best trial is:


{'u1': 4,
 'u2': 4,
 'f_id_list': [64, 128, 64],
 'k_id_list': [4, 4, 8],
 'f_cd_list': [64, 128, 64],
 'k_cd_list': [4, 4, 8],
 's_cd_list': [2, 2, 2],
 'auc': 0.999749776849993,
 'my_metric': array([0.92109144, 0.91459788, 0.91880199, 0.92195507]),
 'my_metric_min': 0.91459788001988,
 'my_metric_max': 0.9219550746718593,
 'my_metric_mean': 0.919111596340142}

In [68]:
get_trial_info(4)

{'u1': 4,
 'u2': 4,
 'f_id_list': [64, 128, 64],
 'k_id_list': [4, 8, 8],
 'f_cd_list': [64, 128, 64],
 'k_cd_list': [4, 8, 8],
 's_cd_list': [2, 2, 2],
 'auc': 0.9998100738097733,
 'my_metric': array([0.88609792, 0.91376683, 0.91588519, 0.86954219]),
 'my_metric_min': 0.8695421918410912,
 'my_metric_max': 0.9158851853964167,
 'my_metric_mean': 0.8963230321744869}