In [25]:
import tensorflow as tf
from Gaugi import retrieve_kw, mkdir_p
from Gaugi.messenger import Logger
from Gaugi.messenger.macros import *
from Gaugi import load
from itertools import product
from keras.layers import Dense, Input, Concatenate, Flatten, BatchNormalization, Dropout, LeakyReLU
from keras.models import Sequential, Model
import pandas as pd
import numpy as np

In [26]:
##########################################################
# ------------------------------------------------------ #
# --------------------- INITIATION --------------------- #
# ------------------------------------------------------ #
##########################################################

# Number of events
total = 100000

# Percentage of background samples on the testing phase
background_percent = 0.99

# Percentage of samples on the training phase
test_size = 0.3

# Number of iterations

n_it = 33

# Defining hyper-parameters range

min_batch_size = 200

max_batch_size = 500

min_hidden_dim = 8

max_hidden_dim = 21

In [27]:
# Parameters in study

batch_size_list = list(np.linspace(min_batch_size,max_batch_size,num=3,dtype=int))
encoding_dim_list = list(np.linspace(max_hidden_dim,min_hidden_dim,num=3,dtype=int))
lambda_disco_list = list(np.linspace(0,600,num=3,dtype=int))
act_func_list_1 = ['relu',
                   'sigmoid',
                   #'softmax',
                   #'softplus',
                   #'softsign',
                   'tanh',
                   #'selu',
                   #'elu',
                   #'exponential'
                   ]
act_func_list_2 = act_func_list_1
act_func_list_3 = act_func_list_1

n_combinations = (len(batch_size_list) * 
                    len(encoding_dim_list) * 
                    len(lambda_disco_list) * 
                    len(act_func_list_1) * 
                    len(act_func_list_1) * 
                    len(act_func_list_1)
                )

modelCol = []

for batch_size in batch_size_list:
    for encoding_dim in encoding_dim_list:
        for lambda_disco in lambda_disco_list:
            for act_1 in act_func_list_1:
                for act_2 in act_func_list_2:
                    for act_3 in act_func_list_3:

                        # Fixed parameters

                        nb_epoch = 100
                        input_dim = 21
                        hidden_dim_1 = int(encoding_dim / 2)
                        hidden_dim_2 = int(hidden_dim_1 / 2)
                        learning_rate = 0.001

                        ###### Creates NN structure #####
                        
                        #input Layer
                        input_layer = Input(shape=(input_dim, ))
                        sample_weights = Input(shape=(1, ))
                        #Encoder
                        encoder = tf.keras.layers.Dense(encoding_dim, 
                                                        activation=act_1,
                                activity_regularizer=tf.keras.regularizers.l2(learning_rate)
                                                    )(input_layer)
                        
                        encoder = tf.keras.layers.Dropout(0.2)(encoder)
                        
                        encoder = tf.keras.layers.Dense(hidden_dim_1, 
                                                        activation=act_2
                                                    )(encoder)
                        
                        encoder = tf.keras.layers.Dense(hidden_dim_2, 
                                                        activation=act_3
                                                    )(encoder)
                        # Decoder
                        decoder = tf.keras.layers.Dense(hidden_dim_1, 
                                                        activation=act_3
                                                    )(encoder)
                        
                        decoder=tf.keras.layers.Dropout(0.2)(decoder)
                        
                        decoder = tf.keras.layers.Dense(encoding_dim, 
                                                        activation=act_2
                                                    )(decoder)
                        
                        decoder = tf.keras.layers.Dense(input_dim, 
                                                        activation=act_1
                                                    )(decoder)
                        #Autoencoder
                        autoencoder = tf.keras.Model(inputs=[input_layer, sample_weights], 
                                                    outputs=decoder
                                                    )
                        
                        modelCol.append(autoencoder)

In [28]:
def get_model():
    modelCol = []
    for n in range(10, 15):
        model = Sequential()
        model.add(Dense(n, input_shape=(100,), activation='tanh', name='dense_layer'))
        model.add(Dense(1, activation='linear', name='output_for_inference'))
        model.add(Activation('tanh', name='output_for_training'))
        modelCol.append(model)

In [9]:
def time_stamp_func():
    from datetime import datetime
    dateTimeObj = datetime.now()
    timestampStr = dateTimeObj.strftime("%d-%b-%Y-%H.%M.%S")
    return timestampStr

In [10]:
def create_iter(fun, n_items_per_job, items_lim):
    return ([fun(i, n_items_per_job)
           if (i+n_items_per_job) <= items_lim 
           else fun(i, items_lim % n_items_per_job) 
           for i in range(0, items_lim, n_items_per_job)])

In [11]:
__all__ = ['Job_v1']


from sklearn.model_selection import *
from Gaugi import LoggerStreamable, LoggerRawDictStreamer, RawDictCnv
# Just to remove the keras dependence
import tensorflow as tf
model_from_json = tf.keras.models.model_from_json

import json

class Job_v1( LoggerStreamable ):

    _streamerObj = LoggerRawDictStreamer(toPublicAttrs = {'_metadata','_id' , '_sorts', '_inits', '_models'})
    _cnvObj = RawDictCnv(toProtectedAttrs = {'_metadata','_id', '_sorts', '_inits', '_models'})

    __version =  1

    def __init__( self, **kw ):
        LoggerStreamable.__init__(self, kw)
        self._sorts  = []
        self._inits  = []
        self._models = []
        self._id     = None
        self._metadata = None

    def setSorts(self, v):
        if type(v) is int:
            self._sorts = [v]
        else:
            self._sorts = v


    def setInits(self, v):
        if type(v) is int:
            self._inits = range(v)
        else:
            self._inits = v


    def getSorts(self):
        return self._sorts


    def getInits(self):
        return self._inits


    def setMetadata( self, d):
        self._metadata = d


    def getMetadata(self):
        return self._metadata


    def setModels(self, models, id_models):
        self._models = list()
        if type(models) is not list:
            models=[models]
        for idx, model in enumerate(models):
            self._models.append({'model':  json.loads(model.to_json()), 
                                'weights': model.get_weights() , 
                                'id_model': id_models[idx]})


    def getModels(self):
        # Loop over all keras model
        models = []; id_models = []
        for d in self._models:
            model = model_from_json( json.dumps(d['model'], 
                                    separators=(',', ':')) , 
                                    custom_objects={'RpLayer':RpLayer})
            model.set_weights( d['weights'] )
            models.append( model )
            id_models.append( d['id_model'] )
        return models, id_models


    def setId( self, id ):
        self._id = id


    def id(self):
        return self._id


    def save(self, fname):
        d = self.toRawObj()
        d['__version'] = self.__version
        from Gaugi import save
        save( d, fname, compress=True)

# Creates the model architeture 

In [14]:
models = modelCol
nInits = 1
nInitsPerJob = 1
sortBounds = 1
nSortsPerJob = 1
nModelsPerJob = 5
outputFolder = 'job-config'

# Saves the models in .json 

In [16]:
time_stamp = time_stamp_func()    
# creating the job mechanism file first
mkdir_p(outputFolder)

if type(models) is not list:
    models = [models]

modelJobsWindowList = create_iter(lambda i, sorts: list(range(i, i+sorts)), 
                                  nModelsPerJob,
                                  len(models))
sortJobsWindowList  = create_iter(lambda i, sorts: list(range(i, i+sorts)), 
                                  nSortsPerJob,
                                  sortBounds)
initJobsWindowList  = create_iter(lambda i, sorts: list(range(i, i+sorts)), 
                                  nInitsPerJob, 
                                  nInits)

nJobs = 0 
for (model_idx_list, sort_list, init_list) in product(modelJobsWindowList,
                                                      sortJobsWindowList, 
                                                      initJobsWindowList):
    job = Job_v1()
    # to be user by the database table
    job.setId( nJobs )
    job.setSorts(sort_list)
    job.setInits(init_list)
    job.setModels([models[idx] for idx in model_idx_list],  model_idx_list )
    # save config file
    model_str = 'ml%i.mu%i' %(model_idx_list[0], model_idx_list[-1])
    sort_str  = 'sl%i.su%i' %(sort_list[0], sort_list[-1])
    init_str  = 'il%i.iu%i' %(init_list[0], init_list[-1])
    job.save( outputFolder+'/' + ('job_config.ID_%s.%s_%s_%s.%s') %
          ( str(nJobs).zfill(4), model_str, sort_str, init_str, time_stamp) )
    nJobs+=1

# Train the models

In [19]:
try:
    from tensorflow.compat.v1 import ConfigProto
    from tensorflow.compat.v1 import InteractiveSession

    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
except Exception as e:
    print(e)
    print("Not possible to set gpu allow growth")



In [24]:
def getPileup( path ):
    return load(path)['data'][:,0]


def getJobConfigId( path ):
    return dict(load(path))['id']

In [None]:
try:

    job_id = getJobConfigId( args.configFile )
    
    mkdir_p('results')

    outputFile = './results/tunedDiscr.jobID_%s'%str(job_id).zfill(4)

    from saphyra.decorators import Summary, Reference
    decorators = [Summary(), Reference(args.refFile, targets)]

    from saphyra.callbacks import sp


    from saphyra import PatternGenerator
    from sklearn.model_selection import StratifiedKFold
    from saphyra.applications import BinaryClassificationJob


    job = BinaryClassificationJob(  PatternGenerator( args.dataFile, getPatterns ),
                                  StratifiedKFold(n_splits=10, random_state=512, shuffle=True),
                                  job               = args.configFile,
                                  loss              = 'mean_squared_error',
                                  metrics           = ['accuracy'],
                                  callbacks         = [sp(patience=25, verbose=True, save_the_best=True)],
                                  epochs            = 5000,
                                  class_weight      = False,
                                  outputFile        = outputFile )
    ############# AQUI É ONDE EU DEVO ADICIONAR O OPTIMIZER (optimizer = 'adam')
    # modificar o ADAM com o SetMembership e ele sera um parametro da função BinaryClassificationJob

    job.decorators += decorators


    # Run it!
    job.run()


    # necessary to work on orchestra
    from saphyra import lock_as_completed_job
    lock_as_completed_job(args.volume if args.volume else '.')

    sys.exit(0)

except  Exception as e:
    print(e)

    # necessary to work on orchestra
    from saphyra import lock_as_failed_job
    lock_as_failed_job(args.volume if args.volume else '.')

    sys.exit(1)