In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as tk

from keras.layers import Dense, Dropout, Flatten, Conv2D, Input, Add, \
                         Activation, ZeroPadding2D, BatchNormalization, \
                         AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model
from keras.initializers import glorot_uniform

Primo blocco conv, NON è un residual block!\
Con input shape = (230,124,2) ==> output del layer shape = (51, 57, 32)

In [2]:
def conv1(X, filters = 32 , block="conv1", stage=2):
            
        # defining name basis
        conv_name_base = 'res' + str(stage) + block + '_branch'
        bn_name_base = 'bn' + str(stage) + block + '_branch'
        
        X_input = X
        # First component of main path
        X = Conv2D(filters, kernel_size = (6,3), strides = (2,1),
                name = conv_name_base + '2a',
                #nchannels??
                # data_format="channels_first", 
                # kernel_initializer = glorot_uniform(seed=0)
                )(X)
        X = BatchNormalization(axis = 1, name = bn_name_base + '2a')(X)
        X = Activation('relu')(X)
        
        # Second component of main path
        X = Conv2D(filters, kernel_size = (3,3), strides = (2,2),
                name = conv_name_base + '2b',
                #nchannels?? 
                # kernel_initializer = glorot_uniform(seed=0)
                )(X)
        X = BatchNormalization(axis=1, name = bn_name_base + '2b')(X)
        X = Activation('relu')(X)

        X = MaxPooling2D((3,3), strides=(1, 1))(X)
        
        return X

In [3]:
# X_input = Input(shape=(219,122,2)) 

# res1 = conv1(X_input)
# res1.summary()

Residual block Model: function that takes in input the numbers of filters and the stride of the convolutional layer.\
In this way this block can be used for all the residual blocks of the ResNet

In [12]:
def conv2x(X, stride, filters, block,stage=2):
        # defining name basis
        conv_name_base = 'res' + str(stage) + block + '_branch'
        bn_name_base = 'bn' + str(stage) + block + '_branch'

        #filters for each ==> there are two filters because 4 layer 
        #                     --> the first 2 and the last 2 have the same f 
        f1,f2 = filters
        s = stride
        #skip path for the residual part
        X_shortcut = Conv2D(f2, kernel_size = (3,3), strides = (s,s),
                name = conv_name_base + '-shortcut',
                )(X)

        X_shortcut = BatchNormalization(axis=1, name = bn_name_base + '-shortcut')(X_shortcut)

        # First component of main path
        X = Conv2D(f1, kernel_size = (1,1), strides = (1,1),
                name = conv_name_base + '2a',
                )(X)
        X = BatchNormalization(axis = 1, name = bn_name_base + '2a')(X)
        X = Activation('relu')(X)

        # Second component of main path
        X = Conv2D(f1, kernel_size = (3,3), strides = (s,s),
                name = conv_name_base + '2b',
                )(X)
        X = BatchNormalization(axis=1, name = bn_name_base + '2b')(X)
        X = Activation('relu')(X)
        
        # Second component of main path
        X = Conv2D(f2, kernel_size = (1,1), strides = (1,1),
                name = conv_name_base + '2c',
                )(X)
        X = BatchNormalization(axis=1, name = bn_name_base + '2c')(X)
        X = Activation('relu')(X)

        X = Add()([X, X_shortcut])
        out = Activation('relu')(X)

        return out


Learning rate scheduler

In [None]:
class MyLRSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):

  def __init__(self, initial_learning_rate):
    self.initial_learning_rate = initial_learning_rate

  def __call__(self, step):
     return self.initial_learning_rate / (step + 1)

In [13]:
# X_input = Input(shape=( 24, 26, 256)) 

# res2 = conv2x(X_input, stride=2, filters=[128,512], block="a")
# res2.summary()

In [14]:
def ResNetJ(feature, lr_power=-3.0, lr_decay=0.0):
    """
    Implementation of the popular ResNet50 the following architecture:
    Arguments:
        feature is a string which allow two input values:
        --> 'vertex' or 'energy'
    """
    
    # Define the input as a tensor with shape input_shape
    # X_input = Input(shape=(230,124,2))
    X_input = Input(shape=(219,122,2))
    
    # Stage 1
    X = conv1(X_input, block="conv1")
    # Stage2
    X = conv2x(X, stride=1, filters=[32,128], block="conv2x_1")
    X = conv2x(X, stride=1, filters=[32,128], block="conv2x_2")
    X = conv2x(X, stride=1, filters=[32,128], block="conv2x_3")

    # Stage 3
    # X = conv2x(X, stride=2, filters=[64,256], block="conv3x_1")
    X = conv2x(X, stride=1, filters=[64,256], block="conv3x_2")
    X = conv2x(X, stride=1, filters=[64,256], block="conv3x_3")
    X = conv2x(X, stride=1, filters=[64,256], block="conv3x_4")

    # Stage 4 
    # X = conv2x(X, stride=2, filters=[128,512], block="conv4x_1")
    X = conv2x(X, stride=1, filters=[128,512], block="conv4x_2")
    X = conv2x(X, stride=1, filters=[128,512], block="conv4x_3")
    X = conv2x(X, stride=1, filters=[128,512], block="conv4x_4")
    X = conv2x(X, stride=1, filters=[128,512], block="conv4x_5")
    X = conv2x(X, stride=1, filters=[128,512], block="conv4x_6")

    # Stage 5
    X = conv2x(X, stride=2, filters=[256,1024], block="conv5x_1")
    X = conv2x(X, stride=1, filters=[256,1024], block="conv5x_2")
    X = conv2x(X, stride=1, filters=[256,1024], block="conv5x_3")

    """
    prova con i filters originali del papaer ma senza i blocchi rossi
    """
    # Stage 1
    # X = conv1(X_input, block="conv1")
    # # Stage2
    # X = conv2x(X, stride=1, filters=[64,256], block="conv2x_1")
    # # X = conv2x(X, stride=1, filters=[64,256], block="conv2x_2")
    # # X = conv2x(X, stride=1, filters=[64,256], block="conv2x_3")

    # # Stage 3
    # X = conv2x(X, stride=2, filters=[64,256], block="conv3x_1")
    # X = conv2x(X, stride=1, filters=[128,512], block="conv3x_2")
    # # X = conv2x(X, stride=1, filters=[128,512], block="conv3x_3")
    # # X = conv2x(X, stride=1, filters=[128,512], block="conv3x_4")

    # # Stage 4 
    # X = conv2x(X, stride=2, filters=[128,512], block="conv4x_1")
    # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_2")
    # # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_3")
    # # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_4")
    # # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_5")
    # # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_6")

    # # Stage 5
    # X = conv2x(X, stride=2, filters=[512,2048], block="conv5x_1")
    # X = conv2x(X, stride=1, filters=[512,2048], block="conv5x_2")
    # # X = conv2x(X, stride=1, filters=[512,2048], block="conv5x_3")

    # AVGPOOL 
    X = AveragePooling2D((2,2), name='avg_pool')(X)
    # Flatten
    X = Flatten()(X)
    X = Dense(512, name='first_dense',  kernel_initializer=glorot_uniform(seed=0))(X)
    X = Dense(100, name='second_dense', kernel_initializer=glorot_uniform(seed=0))(X)

    # Output 
    if(feature=="energy"):
        X = Dense(1, name='fc_outputs', kernel_initializer=glorot_uniform(seed=0))(X)
    elif(feature=="vertex"):
        X = Dense(3, name='fc_outputs', kernel_initializer=glorot_uniform(seed=0))(X)
    
    # Create model
    model = Model(inputs = X_input, outputs = X, name = 'ResNetJ')
    
    # Compile model
    learning_rate = 10.0**(lr_power)
    opt = tk.optimizers.Adam(learning_rate=learning_rate, beta_1 = 0.9, beta_2 = 0.999 )

    model.compile(loss="mean_squared_error", optimizer=opt, metrics=['accuracy'])
    
    return model    

In [17]:
res = ResNetJ(feature="energy")
res.summary()


Model: "ResNetJ"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 219, 122, 2  0           []                               
                                )]                                                                
                                                                                                  
 res2conv1_branch2a (Conv2D)    (None, 107, 120, 32  1184        ['input_4[0][0]']                
                                )                                                                 
                                                                                                  
 bn2conv1_branch2a (BatchNormal  (None, 107, 120, 32  428        ['res2conv1_branch2a[0][0]']     
 ization)                       )                                                           

Setting a learning rate which increase in the first epoch from $0$ to $10^{-3}$, and then decrease from $10^{-3}$ to $10^{-8}$


In [18]:
from keras.callbacks import LearningRateScheduler

def step_decay_schedule(initial_lr=1e-8, decay_factor=0.75, step_size=10, BS=64, ndat = 5e6):
    '''
    Wrapper function to create a LearningRateScheduler with step decay schedule.
    '''
    def schedule(epoch):
        if epoch==1:
            decay_factor = 10**(-3) * BS / ndat
            lr_sched = initial_lr * decay_factor     #(decay_factor ** np.floor(epoch/step_size))
            return lr_sched
        else:
            decay_factor = 0.1
            lr_sched = tk.optimizers.schedules.ExponentialDecay(initial_learning_rate=10**(-3) )
            return lr_sched
#        return initial_lr * (decay_factor ** np.floor(epoch/step_size))
    
    return LearningRateScheduler(schedule)

lr_sched = step_decay_schedule(initial_lr=1e-4)

# setting an adaptive learning rate

# def adapt_learning_rate(epoch):
#     if(epoch==1):
#         lr = 0.001
#     elif(epoch!=1):
#         lr = 0.1*epoch
#     return lr

# lr_history = tk.callbacks.Callback.LearningRate()
# lr_rate = tk.callbacks.LearningRateScheduler(adapt_learning_rate)


Fit

In [19]:
BATCH_SIZE = 64
EPOCHS = 15
# history = res.fit(x_train, y_train,
#         batch_size=BATCH_SIZE,
#         epochs=EPOCHS,
#         callbacks=[lr_sched],
#         validation_data=(x_val, y_val),
#         shuffle=True)

In [58]:
def f(n,k,s):
    n_out = (n-k)/s + 1
    return n_out
f(26,25,1)

2.0

In [32]:
def conv1(X, block="conv1", stage=2):
            
        # defining name basis
        conv_name_base = 'res' + str(stage) + block + '_branch'
        bn_name_base = 'bn' + str(stage) + block + '_branch'
        
        X_input = X
        # First component of main path
        X = Conv2D(64, kernel_size = (6,3), strides = (2,1),
                name = conv_name_base + '2a',
                #nchannels??
                # data_format="channels_first", 
                # kernel_initializer = glorot_uniform(seed=0)
                )(X)
        X = BatchNormalization(axis = 1, name = bn_name_base + '2a')(X)
        X = Activation('relu')(X)
        
        # Second component of main path
        X = Conv2D(64, kernel_size = (3,3), strides = (2,2),
                name = conv_name_base + '2b',
                #nchannels?? 
                # kernel_initializer = glorot_uniform(seed=0)
                )(X)
        X = BatchNormalization(axis=1, name = bn_name_base + '2b')(X)
        X = Activation('relu')(X)

        X = MaxPooling2D((2,2), strides=(1,1))(X)
        
        # Create model
        model = Model(inputs = X_input, outputs = X, name = 'ResNetJ')
        
        # Compile model
        learning_rate = 1e-3
        opt = tk.optimizers.Adam(learning_rate=learning_rate, beta_1 = 0.9, beta_2 = 0.999 )

        model.compile(loss="mean_squared_error", optimizer=opt, metrics=['accuracy'])
        
        return model 

In [33]:
X_input = Input(shape=(230,124,2))
res1 = conv1(X_input)
res1.summary()

Model: "ResNetJ"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 230, 124, 2)]     0         
                                                                 
 res2conv1_branch2a (Conv2D)  (None, 113, 122, 64)     2368      
                                                                 
 bn2conv1_branch2a (BatchNor  (None, 113, 122, 64)     452       
 malization)                                                     
                                                                 
 activation_230 (Activation)  (None, 113, 122, 64)     0         
                                                                 
 res2conv1_branch2b (Conv2D)  (None, 56, 60, 64)       36928     
                                                                 
 bn2conv1_branch2b (BatchNor  (None, 56, 60, 64)       224       
 malization)                                               

In [43]:
    # """
    # prova
    # """
    # # Stage 1
    # X = conv1(X_input, block="conv1")
    # # Stage2
    # X = conv2x(X, stride=1, filters=[64,256], block="conv2x_1")
    # X = conv2x(X, stride=1, filters=[64,256], block="conv2x_2")
    # X = conv2x(X, stride=1, filters=[64,256], block="conv2x_3")

    # # Stage 3
    # X = conv2x(X, stride=1, filters=[64,256], block="conv3x_1")
    # X = conv2x(X, stride=1, filters=[128,512], block="conv3x_2")
    # X = conv2x(X, stride=1, filters=[128,512], block="conv3x_3")
    # X = conv2x(X, stride=1, filters=[128,512], block="conv3x_4")

    # # Stage 4 
    # X = conv2x(X, stride=1, filters=[128,512], block="conv4x_1")
    # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_2")
    # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_3")
    # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_4")
    # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_5")
    # X = conv2x(X, stride=1, filters=[256,1024], block="conv4x_6")

    # # Stage 5
    # X = conv2x(X, stride=1, filters=[512,2048], block="conv5x_1")
    # X = conv2x(X, stride=1, filters=[512,2048], block="conv5x_2")
    # X = conv2x(X, stride=1, filters=[512,2048], block="conv5x_3")

21.0