In [1]:
#We can go into our root file and see what Trees are availiable
%matplotlib inline
import sys, os
if __package__ is None:
    import sys, os
    sys.path.append(os.path.realpath("/data/shared/Software/"))
    sys.path.append(os.path.realpath("../../"))
import numpy as np
import pandas as pd
import ntpath
import glob
import deepconfig

#from keras.utils.visualize_util import plot
#from IPython.display import Image, display

from CMS_SURF_2016.utils.preprocessing import *
from CMS_SURF_2016.utils.callbacks import OverfitStopping, SmartCheckpoint
from CMS_SURF_2016.utils.batch import batchAssertArchived, batchExecuteAndTestTrials
from CMS_SURF_2016.utils.archiving import *
from CMS_SURF_2016.utils.analysistools import findsubsets
from CMS_SURF_2016.layers.lorentz import Lorentz, _lorentz
from CMS_SURF_2016.layers.slice import Slice

from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Flatten, Reshape, Activation, Dropout, Convolution2D, merge, Input, Flatten, Lambda, LSTM, Masking
from keras.engine.topology import Layer
from keras.callbacks import EarlyStopping
from keras.utils.visualize_util import plot
from keras.layers.advanced_activations import LeakyReLU


dc = deepconfig.deepconfig(gpu='gpu0', backend='theano')




Using Theano backend.
Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, cuDNN not available)

Couldn't import dot_parser, loading of dot files will not be possible.
using gpu0
using theano





In [2]:
#The observables taken from the table
observ_types = ['Entry','E/c', 'Px', 'Py', 'Pz', 'PT_ET','Eta', 'Phi', 'Charge', 'X', 'Y', 'Z',\
                     'Dxy', 'Ehad', 'Eem', 'MuIso', 'EleIso', 'ChHadIso','NeuHadIso','GammaIso']
vecsize = len(observ_types)
epochs = 60
batch_size = 100

label_dir_pairs = \
            [   ("ttbar", "/data/shared/Delphes/ttbar_lepFilter_13TeV/pandas_h5/"),
                ("wjet", "/data/shared/Delphes/wjets_lepFilter_13TeV/pandas_h5/"),
                ("qcd", "/data/shared/Delphes/qcd_lepFilter_13TeV/pandas_h5/")
            ]


In [3]:
#Find all the subsets of label_dir_pairs and store them as sorted lists
ldpsubsets = [sorted(list(s)) for s in findsubsets(label_dir_pairs)]
#Make sure that we do 3-way classification as well
ldpsubsets.append(label_dir_pairs)


In [4]:
def genModel(name,out_dim, depth, lstm_activation="relu", lstm_dropout = 0.0, dropout=0.0):
    inputs = []
    mergelist = []
    for i, profile in enumerate(object_profiles):
        inp = a = Input(shape=(profile.max_size, vecsize), name="input_"+str(i))
        inputs.append(inp)
        mergelist.append(a)
    a = merge(mergelist,mode='concat',concat_axis=1, name="merge")
    for i in range(depth):
        a = Masking(mask_value=0.0)(a)
        a = LSTM(vecsize,
                 input_shape=(None,vecsize),
                 dropout_W=lstm_dropout,
                 dropout_U=lstm_dropout,
                 activation=lstm_activation,
                 name = "lstm_" +str(i))(a)
        if(dropout > 0.0):
            a =  Dropout(dropout, name="dropout_"+str(i))(a)
    dense_out = Dense(out_dim, activation='sigmoid', name='main_output')(a)
    model = Model(input=inputs, output=dense_out, name=name)
    return model

In [5]:
archive_dir = "/data/shared/Delphes/keras_archive/"
patience = 10
earlyStopping = EarlyStopping(verbose=1, patience=patience)
trial_tups = []
#Loop over all subsets
for ldp in ldpsubsets:
    labels = [x[0] for x in ldp]
    for sort_on in ["PT_ET", "Phi", "Eta"]:
        #Use object maxes from Find_Maxes_From Query
        object_profiles = [ObjectProfile("Electron",-1),
                            ObjectProfile("MuonTight", -1),
                            ObjectProfile("Photon", -1),
                            ObjectProfile("MissingET", 1),
                            ObjectProfile("EFlowPhoton",200, query="PT_ET > 1.0", sort_columns=[sort_on], sort_ascending=False), 
                            ObjectProfile("EFlowNeutralHadron",370, query="PT_ET > 1.0", sort_columns=[sort_on], sort_ascending=False), 
                            ObjectProfile("EFlowTrack",420, query="PT_ET > 1.0", sort_columns=[sort_on], sort_ascending=False)] 

        resolveProfileMaxes(object_profiles, ldp)

        dps, l = getGensDefaultFormat(archive_dir, (75000,20000,20000), 135000, \
                             object_profiles,ldp,observ_types,megabytes=100, verbose=0)

        dependencies = batchAssertArchived(dps)
        train, num_train = l[0]
        val,   num_val   = l[1]
        test,  num_test  = l[2]
        max_q_size = l[3]
        print("MAXQ: ",max_q_size)

       
        for name in ['LSTM']:
            for depth in [1]:
                    for activation in ['tanh']:
                        for lstm_dropout in [0.0]:
                            for dropout in [0.0]:
                                activation_name = activation if isinstance(activation, str) \
                                                    else activation.__name__

                                model = genModel(name, len(labels), depth, activation, lstm_dropout, dropout)

                                trial = KerasTrial(archive_dir, name=name, model=model)

                                trial.setTrain(train_procedure=train,
                                               samples_per_epoch=num_train
                                              )
                                trial.setValidation(val_procedure=val,
                                                   nb_val_samples=num_val)
                                trial.setCompilation(loss='binary_crossentropy',
                                          optimizer='rmsprop',
                                          metrics=['accuracy']
                                              )

                                trial.setFit_Generator( 
                                                nb_epoch=epochs,
                                                callbacks=[earlyStopping],
                                                max_q_size = max_q_size)
                                trial.write()


#                                print("EXECUTE: ", name,labels, depth, activation_name)
#                                trial.execute(custom_objects={"Lorentz":Lorentz,"Slice": Slice},
#                                             train_arg_decode_func=label_dir_pairs_args_decoder,
#                                             val_arg_decode_func=label_dir_pairs_args_decoder)


#                                trial.test(test_proc=test,
#                                             test_samples=num_test,
#                                             custom_objects={"Lorentz":Lorentz,"Slice": Slice},
#                                            arg_decode_func = label_dir_pairs_args_decoder)

                                trial_tups.append((trial, test, num_test, dependencies))

                                trial.to_record({"labels": labels,
                                                 "depth": depth,
                                                 "sort_on" : sort_on,
                                                 "activation": activation_name,
                                                 "dropout":dropout,
                                                 "lstm_dropout":lstm_dropout,
                                                 "query": "PT_ET > 1.0",
                                                 "patience" : patience
                                                })
for tup in trial_tups:
    tup[0].summary()
batchExecuteAndTestTrials(trial_tups)
                
        

    

ARCHIVE SUCCESSFUL '41505b25a78e0c25de033f31585a7d3eec19a3a2'
ARCHIVE SUCCESSFUL '7213cac2c6a7e7cbdf245e6596da535c39c14599'


KeyboardInterrupt: 