In [8]:
from keras import backend as K
from keras.models import Model
from keras.layers import (BatchNormalization, Conv1D, Dense, Input, 
    TimeDistributed, Activation, Bidirectional, SimpleRNN, GRU, LSTM, MaxPooling1D, Dropout)

# using l2 regularization
from keras.regularizers import l2

In [15]:
def final_model(input_dim, filters, kernel_size, stride,
    padding, units, recur_layers, output_dim=29, l2_lambda=0.001, dropout=0.2):
    """ Build a deep network for speech 
    """
    # Main acoustic input
    input_data = Input(name='the_input', shape=(None, input_dim))
    # TODO: Specify the layers in your network
#     input_rnn = Bidirectional(GRU(units, activation='relu',
#                             return_sequences=True, kernel_regularizer=l2(l2_lambda),
#                             recurrent_regularizer=l2(l2_lambda), name='input_gru_withl2'))(input_data)
    
#     # apply batch normalization
#     input_bn = BatchNormalization(name='batch_normal')(input_rnn)

    # Aconvolutional layer
    conv_1d = Conv1D(filters, kernel_size, 
                     strides=stride, 
                     padding=padding,
                     activation='relu',
                     name='conv1d')(input_data)
    # add max poling
    pool = MaxPooling1D(strides=stride, name='pooling')(conv_1d)
    
    # Add batch normalization
    bn_cnn = BatchNormalization(name='bn_conv_1d')(pool)


    last_layer = bn_cnn 
    
    for i in range(recur_layers):
        rnn_name = 'recure_rnn' + str(i)
        batch_name = 'recure_batch_normal' + str(i)
        
        rnn_layer = GRU(units, activation="relu",return_sequences=True, 
                 name=rnn_name, kernel_regularizer=l2(l2_lambda), dropout=dropout,
                 recurrent_regularizer=l2(l2_lambda))(last_layer)

        bn_layer = BatchNormalization(name=batch_name)(rnn_layer)
        last_layer = bn_layer
        
    # TODO: Add a TimeDistributed(Dense(output_dim)) layer
    time_dense = TimeDistributed(Dense(output_dim, kernel_regularizer=l2(l2_lambda), name='dense_layer'))(last_layer)
    # TODO: Add softmax activation layer
    y_pred = Activation('softmax')(time_dense)
    # Specify the model
    model = Model(inputs=input_data, outputs=y_pred)
    # TODO: Specify model.output_length
    model.output_length = lambda x: cnn_output_length(x, kernel_size, padding, stride)
    print(model.summary())
    return model

In [16]:
model_end = final_model(input_dim=161,
                        filters=256,
                        kernel_size=5, 
                        stride=2,
                        padding='valid',
                        units=200, 
                        recur_layers=2)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
the_input (InputLayer)       (None, None, 161)         0         
_________________________________________________________________
conv1d (Conv1D)              (None, None, 256)         206336    
_________________________________________________________________
pooling (MaxPooling1D)       (None, None, 256)         0         
_________________________________________________________________
bn_conv_1d (BatchNormalizati (None, None, 256)         1024      
_________________________________________________________________
recure_rnn0 (GRU)            (None, None, 200)         274200    
_________________________________________________________________
recure_batch_normal0 (BatchN (None, None, 200)         800       
_________________________________________________________________
recure_rnn1 (GRU)            (None, None, 200)         240600    
__________