In [2]:
import numpy as np
import keras
import keras.layers
from keras import backend as K
from keras import regularizers
from keras.utils import to_categorical
import sys
from collections import OrderedDict
import os
sys.path.append(os.path.abspath(".."))
sys.path.append(os.path.join(os.path.abspath(".."),'lib'))
sys.path.append("/home/pablo/Documents/Master/Icecube/DeepIceLearning-master/lib")
import transformations as tr
import numpy as np
import keras.layers

In [45]:
# Add path to own libs
import sys
sys.path.append("../lib/")
sys.path.append("/home/pablo/Documents/Master/Icecube/DeepIceLearning-master/lib")
sys.path.append("/home/pablo/Documents/Master/Icecube/DeepIceLearning-master/lib")


# Keras Imports
import keras 
import keras.layers
from keras.callbacks import CSVLogger, EarlyStopping
import keras.backend as K
from keras.utils import plot_model

# Own Imports
#import block_units as bunit
#from functions import generator, IC_identity, IC_divide_1000, IC_log10, get_indices
import transformations as tr
#from plotting_style import figsize
#from custom import generators
from functions import generator_v2

#Various Imports
import numpy as np
from collections import OrderedDict
import matplotlib.pyplot as plt
import h5py
import time

In [43]:
def generator(batch_size, file_handlers, inds,
              inp_transformations, out_transformations,
              weighting_function=None, use_data=False, equal_len=False,
              mask_func=None):

    """ This function generates the training batches for the neural network.
    It load all input and output data and applies the transformations
    as defined in the network definition file.

    Arguments:
    batch size : the batch size per gpu
    file_handlers: list of files used for the training
                   i.e. ['/path/to/file/A', 'path/to/file/B']
    inds: the index range used for the files
          e.g. [(0,1000), (0,2000)]
    inp_shape_dict: A dictionary with the input shape for each branch
    inp_transformations: Dictionary with input variable name and function
    out_shape_dict: A dictionary with the output shape for each branch
    out_transformations: Dictionary with out variable name and function
    weighting_function: A function that returns the event weights on basis
                        of the information saved in reco_vals, e.g.
                        lambda mc: np.log10(mc['trunc_e'])
    mask_func: a function that returns a mask of values that get a 
                weight of zero, i.e. will not be considered in the loss
                e.g. lambda mc: mc['mu_e_on_entry'] < 1.e2
                        
    Returns:
    batch_input : a batch of input data
    batch_out: a batch of output data
    weights: a weight for each event

    """

#     print('Run with inds {}'.format(inds))

    in_branches = [(branch, inp_transformations[branch]['general'])
                   for branch in inp_transformations]
    out_branches = [(branch, out_transformations[branch]['general'])
                    for branch in out_transformations]
    inp_variables = [[(i, inp_transformations[branch[0]][i][1])
                      for i in inp_transformations[branch[0]] if i != 'general']
                     for branch in in_branches]
    out_variables = [[(i, out_transformations[branch[0]][i][1])
                      for i in out_transformations[branch[0]] if i != 'general']
                     for branch in out_branches]
    cur_file = 0
    ind_lo = inds[0][0]
    ind_hi = inds[0][0] + batch_size
    in_data = h5py.File(file_handlers[0], 'r')
    f_reco_vals = in_data['reco_vals']
    t0 = time.time()
    num_batches = 0
 
    while True:
        inp_data = []
        out_data = []
        weights = []
        arr_size = np.min([batch_size, ind_hi - ind_lo])
        reco_vals = f_reco_vals[ind_lo:ind_hi]

        #print('Generate Input Data')
        for k, b in enumerate(out_branches):
            for j, f in enumerate(out_variables[k]):
                if weighting_function != None:
                    tweights=weighting_function(reco_vals)
                else:
                    tweights=np.ones(arr_size)
                if mask_func != None:
                    mask = mask_func(reco_vals)
                    tweights[mask] = 0
            weights.append(tweights)
            
        for k, b in enumerate(in_branches):
            batch_input = np.zeros((arr_size,)+in_branches[k][1])
            for j, f in enumerate(inp_variables[k]):
                if f[0] in in_data.keys():
                    pre_data = np.array(np.squeeze(in_data[f[0]][ind_lo:ind_hi]), ndmin=4)
                    batch_input[:,:,:,:,j] = np.atleast_1d(f[1](pre_data))
                else:
                    pre_data = np.squeeze(reco_vals[f[0]])
                    batch_input[:,j]=f[1](pre_data)
            inp_data.append(batch_input)
            
        # Generate Output Data
        for k, b in enumerate(out_branches):
            if use_data:
                continue
            shape = (arr_size,)+out_branches[k][1]
            batch_output = np.zeros(shape)
            for j, f in enumerate(out_variables[k]):
                pre_data = np.squeeze(reco_vals[f[0]])
                if len(out_variables[k]) == 1:
                    batch_output[:]=np.reshape(f[1](pre_data), shape)
                else:
                    batch_output[:,j] = f[1](pre_data)
            out_data.append(batch_output)

        #Prepare Next Loop
        ind_lo += batch_size
        ind_hi += batch_size
        if (ind_lo >= inds[cur_file][1]) | (equal_len & (ind_hi > inds[cur_file][1])):
            cur_file += 1
            if cur_file == len(file_handlers):
                cur_file=0
            t1 = time.time()
#             print('\n Open File: {} \n'.format(file_handlers[cur_file]))
#             print('\n Average Time per Batch: {}s \n'.format((t1-t0)/num_batches))
            t0 = time.time()
            num_batches = 0
            in_data.close()
            in_data = h5py.File(file_handlers[cur_file], 'r')
            f_reco_vals = in_data['reco_vals']
            ind_lo = inds[cur_file][0]
            ind_hi = ind_lo + batch_size
        elif ind_hi > inds[cur_file][1]:
            ind_hi = inds[cur_file][1]
       
        # Yield Result
        num_batches += 1
        if use_data:
            yield inp_data
        else:
            yield (inp_data, out_data, weights)
            

In [47]:
#### You don't necessarily have to change anything here

# Files for training

dnn_files={'files_training' : ['h5_final/File_19.h5',
                               'h5_final/File_10.h5',
                               'h5_final/File_11.h5',],
           'files_validation' : ['h5_final/File_14.h5'],
           'files_test' : ['h5_final/File_15.h5'], }
           
# Size of the mini-batches
batch_size = 68

# For imbalanced data an event weighting can be useful
# if you want to use an event weight define a function that uses the reco vals
# to calculate a weight, e.g.: lambda mc: np.log10(mc['trunc_e'])
sample_weights = None #lambda mc: np.log10(mc['trunc_e'])**(1.)

In [52]:
# Input

### Add here the features you want to use #####

# Each input branch is a dictionary containing information about the input features.
# Single features should have the syntax: 'feature': (shape, transformation), where shape is
# in our case always (10,10,60) so just copy and paste. Also add a key 'general' that gives the
# overall shape of the branch, i.e. (10,10,60, number of features)


grid_shape = (10, 10, 60)
input_branch1 = {'IC_charge': (grid_shape,  tr.IC_divide_100) ,
                 'IC_mean': (grid_shape, tr.IC_divide_100),
                 'IC_mult': (grid_shape, tr.IC_divide_1000),
                 'IC_num_pulses': (grid_shape, tr.IC_divide_1000),
                 'general': grid_shape+ (4,)}


inp_shapes = OrderedDict([('Branch_IC_time', input_branch1)])

# Output

### Define here your output variable(s) ####

# Syntax is the same as for the input case, but make sure that shape is always a tuple,
# so for one float use (1,), for two (2,) and so on
output_branch1 = {'ClassificationLabel': ((2, ), tr.oneHotEncode_01), # Predefined Classification Label, transformed to the classes that should get predicted
                  'general': (2,)}
out_shapes = OrderedDict([('Out1', output_branch1)]) 

In [53]:
output_branch1

{'ClassificationLabel': ((2,),
  <function transformations.oneHotEncode_01(x, r_vals=None)>),
 'general': (2,)}

In [69]:
### Most important: Define your model using the functional API of Keras
# https://keras.io/getting-started/functional-api-guide/

# The Input
input_b1 = keras.layers.Input(
    shape=inp_shapes['Branch_IC_time']['general'],
    name = "Input-Branch1")

# Convolutional Layers
z1 = keras.layers.Conv3D(36, (1, 1, 1), activation='relu', padding="same", name='conv1x1x1')(input_b1)
#z1 = keras.layers.BatchNormalization()(z1)
#z1 = keras.layers.Conv3D(16, (3, 3, 4), activation='relu',padding="same", name='conv3x3x4')(input_b1)
#z1 = keras.layers.BatchNormalization()(z1)
#z1 = keras.layers.MaxPooling3D(pool_size=(2, 2, 3))(z1)

#z1 = keras.layers.Conv3D(16, (3, 3, 3), activation='relu',padding="same", name='conv3x3x3_1')(input_b1)
#z1 = keras.layers.BatchNormalization()(z1)
#z1 = keras.layers.Conv3D(16, (3, 3, 3), activation='relu', padding="same", name='conv3x3x3_2')(input_b1)
#z1 = keras.layers.MaxPooling3D(pool_size=(1, 1, 2))(z1)
#z1 = keras.layers.Conv3D(8, (2, 2, 2), activation='relu',padding="same", name='conv2x2x2')(input_b1)
#z1 = keras.layers.BatchNormalization()(z1)
#z1 = keras.layers.Flatten()(z1)

# Fully Connected Layers
#z1 = keras.layers.Dense(32, activation='relu',name='Dense1')(z1)
#z1 = keras.layers.BatchNormalization()(z1)
#z1 = keras.layers.Dropout(0.3)(z1)
#z1 = keras.layers.Dense(64, activation='relu', name='Dense2')(z1)
#z1 = keras.layers.BatchNormalization()(z1)
#z1 = keras.layers.Dropout(0.3)(z1)

# The Output
output_b1 = keras.layers.Dense(4, activation='softmax', name='Target1')(z1)
model= keras.models.Model(inputs=[input_b1], outputs=[output_b1])

# Print a summary of the model
# print(model.summary())

#choose an optimizer an compile the model
# plot_model(model, to_file='./model.png')
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['acc'])

In [70]:
model.summary()

Model: "model_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input-Branch1 (InputLayer)   (None, 10, 10, 60, 4)     0         
_________________________________________________________________
conv1x1x1 (Conv3D)           (None, 10, 10, 60, 36)    180       
_________________________________________________________________
Target1 (Dense)              (None, 10, 10, 60, 4)     148       
Total params: 328
Trainable params: 328
Non-trainable params: 0
_________________________________________________________________


In [71]:
# Fit the Model
# The generator prepares mini-batches that are fed into the network
# For more options and callbacks see https://keras.io/models/sequential/
# as well as the docstrings of the generator in functions.py

### There are a few things that could be changed here, e.g. the 'early stopping'


train_steps, valid_steps, test_steps, train_inds, valid_inds, test_inds = get_indices(dnn_files, batch_size) 

model.fit_generator(
    generator(
        batch_size,
        dnn_files['files_training'],
        train_inds,
        inp_shapes, out_shapes,
        weighting_function=sample_weights,
        mask_func=None
    ),
    steps_per_epoch=train_steps,
    validation_data=generator(
        batch_size,
        dnn_files['files_validation'],
        valid_inds,
        inp_shapes, out_shapes,
        weighting_function=sample_weights,
        mask_func=None
    ),
    validation_steps=valid_steps,
    epochs=10, # Maximum number of epoch for the training
    callbacks = [
        EarlyStopping(patience=1) # Stop once the validation loss did not decrease for one epoch
    ], 
    verbose=1,
    max_queue_size=2,
    use_multiprocessing=True)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/10




KeyboardInterrupt: 

In [18]:
data = h5py.File('h5_final/File_19.h5')

  """Entry point for launching an IPython kernel.


In [None]:
data.keys()

In [17]:
x = h5py.File('h5_final/File_19.h5')

  """Entry point for launching an IPython kernel.
