In [1]:
!pip install mlflow --user



In [2]:
import tensorflow as tf
import scipy.io.wavfile as wav
import glob
import numpy as np
from six.moves import xrange as range
import json
from python_speech_features import mfcc
from sklearn.model_selection import train_test_split
import mlflow

In [3]:
# tf.debugging.set_log_device_placement(True)

In [4]:
# Constants 
SPACE_TOKEN = '<space>'
SPACE_INDEX = 0
FIRST_INDEX = 1
FEAT_MASK_VALUE = 1e+10

# Some configs
num_features = 13
num_units = 100
num_classes = 285 + 1 # 285(including space) + blamk label = 286

# Hyper-parameters
num_epochs = 100
num_layers = 1
batch_size = 2
initial_learning_rate = 0.005
momentum = 0.9

In [6]:
# Loading the data
file_path = glob.glob('../data/train/wav/*.wav')
file_path = file_path[28:32]
audio_list = []
fs_list = []

for file_name in file_path:
    fs, audio = wav.read(file_name)
    audio_list.append(audio)
    fs_list.append(fs)


In [5]:
# Create a dataset composed of data with variable lengths
inputs_list = []
for index in range(len(audio_list)):
    input_val = mfcc(audio_list[index], samplerate=fs_list[index])
    input_val = (input_val - np.mean(input_val)) / np.std(input_val)
    inputs_list.append(input_val)

# Transform in 3D Array
train_inputs = tf.ragged.constant([i for i in inputs_list], dtype=np.float32)
train_seq_len = tf.cast(train_inputs.row_lengths(), tf.int32)
train_inputs = train_inputs.to_tensor(default_value=FEAT_MASK_VALUE)

In [6]:
with open('../data/labels.json', 'r', encoding='UTF-8') as label_file:
    labels = json.load(label_file)
with open('../data/language_model.json', 'r', encoding='UTF-8') as language_file:
    alphabets = json.load(language_file)


In [7]:
# Reading Targets
original_list = []
targets_list = []

for path in file_path:
    file_name = path[:-4].split('wav')[1][1:]
    # Read Label
    label = labels[file_name]
    original = " ".join(label.strip().split(' '))
    original_list.append(original)
    # print(original)
    target = original.replace(' ', '  ')
    # print('step-1. ',target)
    target = target.split(' ')
    # print('step-2. ', target)
    # Adding blank label
    target = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in target])
    # print('step-3. ', target)
    # Transform char into index
    target = np.asarray([alphabets['char_to_num'][x] for x in target])
    # print('step-4. ', target)
    targets_list.append(target)

In [8]:
# Creating sparse representation to feed the placeholder
train_targets = tf.ragged.constant([i for i in targets_list], dtype=np.int32)
train_targets_len = tf.cast(train_targets.row_lengths(), tf.int32)
train_targets = train_targets.to_sparse()

In [9]:
train_targets.shape

TensorShape([4, 38])

In [10]:
# Split Training and Validation sets
# train_inputs, val_inputs = train_inputs[:800], train_inputs[800:]
# train_seq_len, val_seq_len = train_seq_len[:800], train_seq_len[800:]
# train_targets, val_targets = tf.sparse.slice(train_targets, start=[0, 0], size=[800, 163]), tf.sparse.slice(train_targets, start=[800, 0], size=[200, 163])
# train_targets_len, val_targets_len = train_targets_len[:800], train_targets_len[800:]

# train_inputs, val_inputs = train_inputs[:5], train_inputs[5:]
# train_seq_len, val_seq_len = train_seq_len[:5], train_seq_len[5:]
# train_targets, val_targets = tf.sparse.slice(train_targets, start=[0, 0], size=[
#                                              5, 73]), tf.sparse.slice(train_targets, start=[5, 0], size=[5, 73])
# train_targets_len, val_targets_len = train_targets_len[:5], train_targets_len[5:]

val_inputs, val_targets, val_seq_len, val_targets_len = train_inputs, train_targets, train_seq_len, train_targets_len


In [11]:
class CTCLossLayer(tf.keras.layers.Layer):
    def call(self, inputs):
        labels = inputs[0]
        logits = inputs[1]
        label_len = inputs[2]
        logit_len = inputs[3]

        logits_trans = tf.transpose(logits, (1,0,2))
        label_len = tf.reshape(label_len, (-1,))
        logit_len = tf.reshape(logit_len, (-1,))
        loss = tf.reduce_mean(tf.nn.ctc_loss(labels, logits_trans, label_len, logit_len, blank_index=-1))
        # define loss here instead of in compile
        self.add_loss(loss)

        # Decode
        decoded, _ = tf.nn.ctc_greedy_decoder(logits_trans, logit_len)

        # Inaccuracy: label error rate
        ler = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32),labels))
        self.add_metric(ler, name='ler', aggregation='mean')

        return logits


In [12]:
# Defining Training Cells
cells = []
for _ in range(num_layers):
    cell = tf.keras.layers.LSTMCell(num_units)
    cells.append(cell)

stack = tf.keras.layers.StackedRNNCells(cells)

In [13]:
# Definning Input Parameters
input_feature = tf.keras.layers.Input((None, num_features), name='input_feature')
input_label = tf.keras.layers.Input((None,), dtype=tf.int32, sparse=True, name='input_label')
input_feature_len = tf.keras.layers.Input((1,), dtype=tf.int32, name='input_feature_len')
input_label_len =tf.keras.layers.Input((1,), dtype=tf.int32, name='input_label_len')

layer_masking = tf.keras.layers.Masking(FEAT_MASK_VALUE)(input_feature)
layer_rnn = tf.keras.layers.RNN(stack, return_sequences=True)(layer_masking)
# layer_drop = tf.keras.layers.Dropout(0.2, seed=42)(layer_rnn)
layer_output = tf.keras.layers.Dense(num_classes, kernel_initializer=tf.keras.initializers.TruncatedNormal(0.0,0.1), bias_initializer='zeros', name='logit')(layer_rnn)

layer_loss = CTCLossLayer()([input_label, layer_output, input_label_len, input_feature_len])


In [14]:
# Create models for training and prediction
model_train = tf.keras.models.Model(inputs=[input_feature, input_label, input_feature_len, input_label_len],
            outputs=layer_loss)

model_predict = tf.keras.models.Model(inputs=input_feature, outputs=layer_output)

In [15]:
# Compile Training Model with selected optimizer
optimizer = tf.keras.optimizers.SGD(initial_learning_rate, momentum)
model_train.compile(optimizer=optimizer)

# Training, Our y is already defined so no need
try:
    experiment_id = mlflow.create_experiment("Stacked RNN(LSTM): 50 Cells")
    experiment = mlflow.get_experiment(experiment_id)
except mlflow.exceptions.MlflowException:
    experiment = mlflow.get_experiment_by_name("Stacked RNN(LSTM): 50 Cells")

mlflow.tensorflow.autolog()
model_train.fit(x=[train_inputs, train_targets, train_seq_len, train_targets_len], y=None,
                validation_data=([val_inputs, val_targets, val_seq_len, val_targets_len], None),
                batch_size=batch_size, epochs=num_epochs)


2021/08/11 11:30:05 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '7ec1556c7e64453a94be3426dc3985f7', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78



INFO:tensorflow:Assets written to: C:\Users\milky\AppData\Local\Temp\tmpzobwoqy2\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\milky\AppData\Local\Temp\tmpzobwoqy2\model\data\model\assets


<tensorflow.python.keras.callbacks.History at 0x2a9b126b220>

In [7]:
# Decoding
print('Original:')
print(original_list[0])
print(original_list[1])
print(original_list[2])
print(original_list[3])
print('Decoded:')


# train_inputs = tf.ragged.constant([i for i in inputs_list[:6]], dtype=np.float32)
# train_seq_len = tf.cast(train_inputs.row_lengths(), tf.int32)
# train_inputs = train_inputs.to_tensor(default_value=FEAT_MASK_VALUE)

decoded, _ = tf.nn.ctc_greedy_decoder(tf.transpose(
    model_predict.predict(train_inputs), (1, 0, 2)), train_seq_len)

d = tf.sparse.to_dense(decoded[0], default_value=-1).numpy()
str_decoded = [''.join([alphabets['num_to_char'][str(x)]
                       for x in np.asarray(row) if x != -1]) for row in d]

# print('decoded',str_decoded)
for s in str_decoded:
    # Replacing blank label to none
    # s = s.replace(chr(ord('z') + 1), '')
    # Replacing space label to space
    s = s.replace(alphabets['num_to_char']['0'], ' ')
    print(s)


Original:


NameError: name 'original_list' is not defined

In [8]:
alphabets['num_to_char']['0']

NameError: name 'alphabets' is not defined