In [4]:

import numpy as np
import os


#
# reference configuration
#

from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
stop = EarlyStopping(monitor='val_sp', mode='max', verbose=1, patience=25, restore_best_weights=True)

import datetime, os
from tensorflow.keras.callbacks import TensorBoard
logdir = os.path.join('.', 'logs/%s' %(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
tensorboard = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)


loss              = 'binary_crossentropy'
metrics           = 'accuracy'
batch_size        = 500
callbacks         = [stop, tensorboard]
epochs            = 50
class_weight      = True
sorts             = 1
inits             = 1
optimizer       = 'adam'
epochs          = 50
job_auto_config      = 'jobConfig/job_config.ID_0000.ml0.mu5_sl0.su0_il0.iu0.29-Aug-2021-17.28.16.pic.gz'
sorts           = range(1)
inits           = 1
__verbose       = True
__model_generator= None
total = 100000
background_percent = 0.99
test_size = 0.3

In [5]:
from Gaugi.messenger import Logger
from Gaugi.messenger.macros import *
from Gaugi import StatusCode, checkForUnusedVars, retrieve_kw
from six import print_

from tensorflow.keras.models import clone_model
from tensorflow.keras import backend as K
from sklearn.utils.class_weight import compute_class_weight

from datetime import datetime
from copy import deepcopy
import numpy as np
import time
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
import pandas as pd

def lock_as_completed_job(output):
    with open(output+'/.complete','w') as f:
        f.write('complete')


def lock_as_failed_job(output):
    with open(output+'/.failed','w') as f:
        f.write('failed')


def get_data( total, background_percent, test_size ):

    #########################################################
    # ------------------------------------------------------ #
    # ----------------------- LOADING ---------------------- #
    # ------------------------------------------------------ #
    ##########################################################
    # Firstly the model loads the background and signal data, 
    # then it removes the attributes first string line, which 
    # are the column names, in order to avoid NaN values in 
    # the array.

    print('==== Commencing Initiation ====\n')

    ### Background
    b_name='/home/thiago/Documents/Data_Sets/LPC-anomaly-detection/Input_Background_1.csv'
    background = np.genfromtxt(b_name, delimiter=',')
    background = background[1:,:]
    print(".Background Loaded..." )
    print(".Background shape: {}".format(background.shape))

    ### Signal
    s_name='/home/thiago/Documents/Data_Sets/LPC-anomaly-detection/Input_Signal_1.csv'
    signal = np.genfromtxt(s_name, delimiter=',')
    signal = signal[1:,:]
    print(".Signal Loaded...")
    print(".Signal shape: {}\n".format(signal.shape))

    ##########################################################
    # ------------------------------------------------------ #
    # --------------------- INITIATION --------------------- #
    # ------------------------------------------------------ #
    ##########################################################

    print('\n          ==== Initiation Complete ====\n')
    print('=*='*17 )
    print('\n      ==== Commencing Pre-processing ====\n')

    # Percentage of background samples to divide the data-set
    dat_set_percent = total/len(background)

    # Reducing background samples
    _,reduced_background = train_test_split(background, test_size=dat_set_percent)

    # Deviding train and test background

    train_data, background_test = train_test_split(reduced_background, test_size=test_size)

    # Iserting the correct number of signal in streaming

    n_signal_samples = int(len(background_test)*(1-background_percent))

    _,background_test = train_test_split(background_test, test_size=background_percent)

    _,signal_test = train_test_split(signal, test_size=n_signal_samples/len(signal))

    # Concatenating Signal and the Background sub-sets

    test_data = np.vstack((background_test,signal_test))

    # Normalize Data

    print('.Normalizing Data')

    test_data = normalize(test_data,
                            norm='max',
                            axis=0
                        )

    train_data = normalize(train_data,
                            norm='max',
                            axis=0
                        )

    # Creates test data frame

    attributes = np.array(["px1",
                            "py1",
                            "pz1",
                            "E1",
                            "eta1",
                            "phi1",
                            "pt1",
                            "px2",
                            "py2",
                            "pz2",
                            "E2",
                            "eta2",
                            "phi2",
                            "pt2",
                            "Delta_R",
                            "M12",
                            "MET",
                            "S",
                            "C",
                            "HT",
                            "A"]
                        )

    test_df = pd.DataFrame(test_data,columns = attributes)

    # Creating Labels
    print('.Creating Labels')

    test_labels =np.ones((len(test_data)))
    test_labels[:len(background_test)] = 0

    print('\n      ==== Pre-processing Complete ====\n')
    print(".Train data shape: {}".format(train_data.shape))
    print(".Test data shape: {}".format(test_data.shape))
    print(".Test Background shape: {}".format(background_test.shape))
    print(".Test Signal shape: {}".format(signal_test.shape))

    print('=*='*17 )

    return train_data,test_data,test_df,test_labels



In [None]:
for isort, sort in enumerate( sorts ):

    # get the current kfold and train, val sets
    train_data,test_data,test_df,test_labels = get_data(total, 
                                                        background_percent, 
                                                        test_size)
    for imodel, model in enumerate( __models ):

        for iinit, init in enumerate(inits):

            print(model)
            # get the model "ptr" for this sort, init and model index
            if __model_generator:
                print(  "Apply model generator..." )
                model_for_this_init = __model_generator( sort )
            else: 
                model_for_this_init = clone_model(model) # get only the model


            try:
                model_for_this_init.compile( self.optimizer,
                            loss = self.loss,
                            # protection for functions or classes with internal variables
                            # this copy avoid the current training effect the next one.
                            metrics = deepcopy(self.metrics),
                            #metrics = self.metrics,
                            )
                model_for_this_init.summary()
            except RuntimeError as e:
                print("Compilation model error: %s" , e)


            print("Training model id (%d) using sort (%d) and init (%d)", self.__id_models[imodel], sort, init )

            callbacks = deepcopy(self.callbacks)

            start = datetime.now()
            
            # Hacn: used by orchestra to set this job as local test
            if os.getenv('LOCAL_TEST'): 
                print(  "The LOCAL_TEST environ was detected." )
                print(  "This is a short local test, lets skip the fitting for now. ")
                return StatusCode.SUCCESS

            Train = [train_data, np.ones(len(train_data))]

            # Training
            history = model_for_this_init.fit(x = Train, 
                                y = train_data,
                                epochs          = self.epochs,
                                batch_size      = self.batch_size,
                                verbose         = self.__verbose,
                                validation_split= 0.1,
                                callbacks       = callbacks,
                                shuffle         = True).history

            end = datetime.now()

            self.__context.setHandler("time" , end-start)

            self.history = history

            # Clear everything for the next init
            K.clear_session()

    # Clear the keras once again just to be sure
    K.clear_session()

# Saves the models in .json 

In [16]:
time_stamp = time_stamp_func()    
# creating the job mechanism file first
mkdir_p(outputFolder)

if type(models) is not list:
    models = [models]

modelJobsWindowList = create_iter(lambda i, sorts: list(range(i, i+sorts)), 
                                  nModelsPerJob,
                                  len(models))
sortJobsWindowList  = create_iter(lambda i, sorts: list(range(i, i+sorts)), 
                                  nSortsPerJob,
                                  sortBounds)
initJobsWindowList  = create_iter(lambda i, sorts: list(range(i, i+sorts)), 
                                  nInitsPerJob, 
                                  nInits)

nJobs = 0 
for (model_idx_list, sort_list, init_list) in product(modelJobsWindowList,
                                                      sortJobsWindowList, 
                                                      initJobsWindowList):
    job = Job_v1()
    # to be user by the database table
    job.setId( nJobs )
    job.setSorts(sort_list)
    job.setInits(init_list)
    job.setModels([models[idx] for idx in model_idx_list],  model_idx_list )
    # save config file
    model_str = 'ml%i.mu%i' %(model_idx_list[0], model_idx_list[-1])
    sort_str  = 'sl%i.su%i' %(sort_list[0], sort_list[-1])
    init_str  = 'il%i.iu%i' %(init_list[0], init_list[-1])
    job.save( outputFolder+'/' + ('job_config.ID_%s.%s_%s_%s.%s') %
          ( str(nJobs).zfill(4), model_str, sort_str, init_str, time_stamp) )
    nJobs+=1

# Train the models

In [19]:
try:
    from tensorflow.compat.v1 import ConfigProto
    from tensorflow.compat.v1 import InteractiveSession

    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
except Exception as e:
    print(e)
    print("Not possible to set gpu allow growth")



In [24]:
def getPileup( path ):
    return load(path)['data'][:,0]


def getJobConfigId( path ):
    return dict(load(path))['id']

In [None]:
try:

    job_id = getJobConfigId( args.configFile )
    
    mkdir_p('results')

    outputFile = './results/tunedDiscr.jobID_%s'%str(job_id).zfill(4)

    from saphyra.decorators import Summary, Reference
    decorators = [Summary(), Reference(args.refFile, targets)]

    from saphyra.callbacks import sp


    from saphyra import PatternGenerator
    from sklearn.model_selection import StratifiedKFold
    from saphyra.applications import BinaryClassificationJob


    job = BinaryClassificationJob(  PatternGenerator( args.dataFile, getPatterns ),
                                  StratifiedKFold(n_splits=10, random_state=512, shuffle=True),
                                  job               = args.configFile,
                                  loss              = 'mean_squared_error',
                                  metrics           = ['accuracy'],
                                  callbacks         = [sp(patience=25, verbose=True, save_the_best=True)],
                                  epochs            = 5000,
                                  class_weight      = False,
                                  outputFile        = outputFile )
    ############# AQUI É ONDE EU DEVO ADICIONAR O OPTIMIZER (optimizer = 'adam')
    # modificar o ADAM com o SetMembership e ele sera um parametro da função BinaryClassificationJob

    job.decorators += decorators


    # Run it!
    job.run()


    # necessary to work on orchestra
    from saphyra import lock_as_completed_job
    lock_as_completed_job(args.volume if args.volume else '.')

    sys.exit(0)

except  Exception as e:
    print(e)

    # necessary to work on orchestra
    from saphyra import lock_as_failed_job
    lock_as_failed_job(args.volume if args.volume else '.')

    sys.exit(1)