In [1]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Bidirectional, Concatenate, Embedding
from keras import optimizers
import numpy as np
import pandas as pd
import re
import _pickle as pickle
from keras.utils import to_categorical
import matplotlib.pyplot as plt 
%matplotlib inline

Using TensorFlow backend.


Upload the training and testing set to the notebook

In [0]:
X_train_p1 = pickle.load(open('drive/My Drive/Colab Notebooks/Data/Capstone/train_MFCC_1', 'rb'))

In [0]:
X_train_p2 = pickle.load(open('drive/My Drive/Colab Notebooks/Data/Capstone/train_MFCC_2', 'rb'))

In [0]:
X_train_p3 = pickle.load(open('drive/My Drive/Colab Notebooks/Data/Capstone/train_MFCC_3', 'rb'))

In [0]:
X_train = X_train_p1 +X_train_p2+X_train_p3

In [0]:
del X_train_p1, X_train_p2, X_train_p3

In [9]:
np.asarray(X_train).shape

(69962,)

In [0]:
X_test = pickle.load(open('drive/My Drive/Colab Notebooks/Data/Capstone/Test_MFCC', 'rb'))

Upload CSV files

In [0]:
X = pd.read_csv('drive/My Drive/Colab Notebooks/Data/Capstone/csv_files/cv-valid-train.csv', sep=',', encoding='ascii')
y= pd.read_csv('drive/My Drive/Colab Notebooks/Data/Capstone/csv_files/cv-valid-test.csv', sep=',', encoding='ascii')

In [0]:
X['filename']= X['filename'].apply(lambda x: re.split(r"(/)", x, re.I)[2])
y['filename']= y['filename'].apply(lambda x: re.split(r"(/)", x, re.I)[2])

In [0]:
y_train = X[0:len(X_train)]['text']
y_test = y[0:len(X_test)]['text']

## Reshaping the data to be used in the model

In [0]:
def input_reshape(data):
    data_len=[]
    t= max(len(x) for x in X_train) 
    data_1 =np.zeros((len(data), t, len(data[0][0])))
    for row in range(len(data)):
        data_len.append(len(data[row]))
        for t in range(len(data[row])):
            for ft in range(len(data[row][t])):
                try:
                  data_1[row][t][ft] = data[row][t][ft]
                except:
                  continue
    return data_1

In [15]:
# reshape training set
X_train_input = input_reshape(X_train)
print(X_train_input.shape)

(69962, 781, 20)


In [16]:
# reshape testing set
X_test_input = input_reshape(X_test)
print(X_test_input.shape)

(3992, 781, 20)


In [0]:
del X_train, X_test

## Text analysis

In [0]:
def clean_text(text):
    '''Clean text by removing unnecessary characters and altering the format of words.'''
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    return (text)

In [19]:
input_texts =y_train.apply(clean_text)
input_texts = list('\t' + input_texts + '\n')
target_characters=sorted(list(set([x for x in ' '.join(input_texts)])))

indexes=[(c, target_characters.index(c)) for c in target_characters]
target_char_index = dict(indexes)
reverse_target_char_index = dict((i, char) for char, i in target_char_index.items())

num_decoder_chars = len(target_characters)
max_decoder_seq_length = max([len(txt) for txt in input_texts])

decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_chars), dtype='float32')
decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_chars),dtype='float32')

for i, text in enumerate( input_texts):
    for t, char in enumerate(text):
        decoder_input_data[i, t, target_char_index[char]] = 1.
        if t > 0:
            decoder_target_data[i, t - 1, target_char_index[char]] = 1.

print(target_characters)
print(target_char_index)

['\t', '\n', ' ', "'", 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
{'\t': 0, '\n': 1, ' ': 2, "'": 3, 'a': 4, 'b': 5, 'c': 6, 'd': 7, 'e': 8, 'f': 9, 'g': 10, 'h': 11, 'i': 12, 'j': 13, 'k': 14, 'l': 15, 'm': 16, 'n': 17, 'o': 18, 'p': 19, 'q': 20, 'r': 21, 's': 22, 't': 23, 'u': 24, 'v': 25, 'w': 26, 'x': 27, 'y': 28, 'z': 29}


# Speech Recognition Modeling:


In [0]:
batch_size = 32*25
epochs = 15
latent_dim = 250
num_encoder_chars=20
train_X = X_train_input 
encoder_input_data= X_train_input

## Build Bidirectional LSTM Model


In [29]:
# encoder
bi_lstm_encoder_inputs = Input(shape=(None, num_encoder_chars))
bi_lstm_encoder = Bidirectional(LSTM(latent_dim, return_state=True))
encoder_outputs, forward_h, forward_c, backward_h, backward_c = bi_lstm_encoder(bi_lstm_encoder_inputs)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
encoder_states = [state_h, state_c]

# decoder
bi_lstm_decoder_inputs = Input(shape=(None, num_decoder_chars))    
decoder_lstm = LSTM(latent_dim*2, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(bi_lstm_decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_chars, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
bi_lstm_model = Model([bi_lstm_encoder_inputs, bi_lstm_decoder_inputs], decoder_outputs)
bi_lstm_model.summary()

Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            (None, None, 20)     0                                            
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) [(None, 500), (None, 542000      input_9[0][0]                    
__________________________________________________________________________________________________
input_10 (InputLayer)           (None, None, 30)     0                                            
__________________________________________________________________________________________________
concatenate_5 (Concatenate)     (None, 500)          0           bidirectional_3[0][1]            
                                                                 bidirectional_3[0][3]      

In [0]:
# define inference encoder
encoder_model = Model(bi_lstm_encoder_inputs, encoder_states)
# define inference decoder
decoder_state_input_h = Input(shape=(latent_dim*2,))
decoder_state_input_c = Input(shape=(latent_dim*2,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(bi_lstm_decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([bi_lstm_decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

Compile and fit the bidirectional model

In [31]:
# optimizer='rmsprop' , 'adam
# sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
# nadam = optimizers.Nadam(learning_rate=0.002, beta_1=0.9, beta_2=0.999)
bi_lstm_model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['accuracy'])
bi_lstm_model.fit([train_X, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          shuffle=True, 
          epochs=epochs,
          validation_split=0.2, verbose=1 )

Train on 55969 samples, validate on 13993 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15

KeyboardInterrupt: ignored

## Bulid LSTM Model

In [26]:
encoder_inputs = Input(shape=(None, num_encoder_chars))
encoder = LSTM(latent_dim, return_state=True )
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]
decoder_inputs = Input(shape=(None, num_decoder_chars))
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)
decoder_dense = Dense(num_decoder_chars, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

lstm_model = Model([encoder_inputs, decoder_inputs], decoder_outputs) 
lstm_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, 20)     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None, 30)     0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, 250), (None, 271000      input_1[0][0]                    
__________________________________________________________________________________________________
lstm_2 (LSTM)                   [(None, None, 250),  281000      input_2[0][0]                    
                                                                 lstm_1[0][1]               

In [0]:
# define inference encoder
encoder_model = Model(encoder_inputs, encoder_states)
# define inference decoder
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

Compile and fit the LSTM model

In [20]:
# optimizer='rmsprop' , 'adam
# sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
# nadam = optimizers.Nadam(learning_rate=0.002, beta_1=0.9, beta_2=0.999)
lstm_model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['accuracy'])
lstm_model.fit([train_X, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          shuffle=True, 
          epochs=epochs,
          validation_split=0.2, verbose=1 )

Train on 55969 samples, validate on 13993 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/1

<keras.callbacks.callbacks.History at 0x7fb30da20c88>

## Save and load the model

In [32]:
%cd /content/drive/My Drive/Colab Notebooks/Data/Capstone/Models/ 
# pickle.dump(lstm_model, open('SR_s2s_E150_B640_L250_2', 'wb'))
pickle.dump(bi_lstm_model, open('SR_s2s_Bi_E50_B640_L200', 'wb'))
# model = pickle.load(open('SR_s2s_E150_B640_L250', 'rb'))
# model = pickle.load(open('SR_s2s_Bi_E50_B640_L200', 'rb'))

/content/drive/My Drive/Colab Notebooks/Data/Capstone/Models


## Decode the sequence 

In [0]:
def decode_sequence_range(seq_index_range, encoder_model, decoder_model):
    for seq_index in range(seq_index_range):
      input_seq = encoder_input_data[seq_index: seq_index + 1]

      # Encode the input as state vectors.
      states_value = encoder_model.predict(input_seq)
      target_seq = np.zeros((1, 1, num_decoder_chars))
      target_seq[0, 0, target_char_index['\t']] = 1. 

      # Sampling loop for a batch of sequences
      # (to simplify, here we assume a batch of size 1).
      stop_condition = False
      decoded_sentence = ''
      while not stop_condition:
          output_tokens, h, c = decoder_model.predict(
              [target_seq] + states_value)
          # Update states
          states_value = [h, c]

          # Sample a char
          sampled_token_index = np.argmax(output_tokens[0, -1, :])
          sampled_char = reverse_target_char_index[sampled_token_index]
          decoded_sentence += sampled_char

          # Update the target sequence
          target_seq = np.zeros((1, 1, num_decoder_chars))
          target_seq[0, 0, sampled_token_index] = 1.

          if (sampled_char == '\n'):
              stop_condition = True
      return decoded_sentence

In [0]:
decoded_sentence = decode_sequence_range(10, encoder_model, decoder_model)

In [0]:
for sentence in range(len(decoded_sentence)):
    print('-'*100)
    print('Input sentence:', input_texts[sentence])
    print('Decoded sentence:', decoded_sentence)

## Evaluation 

In [0]:
# evaluate the model
_, train_acc = model.evaluate(X_train_input, y_train, verbose=1  )
_, test_acc = model.evaluate(X_test_input, y_test, verbose=1)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
# plot loss during training
plt.subplot(211)
plt.title('Loss')
plt.plot(model.history['loss'], label='train')
plt.plot(model.history['val_loss'], label='test')
plt.legend()
# plot accuracy during training
plt.subplot(212)
plt.title('Accuracy')
plt.plot(model.history['accuracy'], label='train')
plt.plot(model.history['val_accuracy'], label='test')
plt.legend()
plt.show()