In [1]:
import tensorflow as tens
from keras.models import load_model
import numpy as np
from keras.utils.generic_utils import get_custom_objects
import pandas as pd
# import required libraries
import sounddevice as sd
from scipy.io.wavfile import write
import wavio as wv

# The set of characters accepted in the transcription.
characters = [x for x in "abcdefghijklmnopqrstuvwxyz'?! "]
# Mapping characters to integers
charTnum = tens.keras.layers.StringLookup(vocabulary=characters, oov_token="")
# Mapping integers back to original characters
numTchar = tens.keras.layers.StringLookup(vocabulary=charTnum.get_vocabulary(), oov_token="", invert=True)

# An integer scalar Tensor. The window length in samples.
frame_length = 256
# An integer scalar Tensor. The number of samples to step.
frame_step = 160
# An integer scalar Tensor. The size of the FFT to apply.
# If not provided, uses the smallest power of 2 enclosing frame_length.
fft_length = 384

batch_size = 2
fname = ''
data = [[fname, 'SPT']]
#loss function
def CTCLoss(y_true, y_pred):
    # Compute the training-time loss value
    batch_len = tens.cast(tens.shape(y_true)[0], dtype="int64")
    input_length = tens.cast(tens.shape(y_pred)[1], dtype="int64")
    label_length = tens.cast(tens.shape(y_true)[1], dtype="int64")

    input_length = input_length * tens.ones(shape=(batch_len, 1), dtype="int64")
    label_length = label_length * tens.ones(shape=(batch_len, 1), dtype="int64")

    loss = tens.keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return loss
get_custom_objects().update({'CTCLoss': CTCLoss})
model = load_model('final_model.hdf5', compile=True)



In [2]:
######encoding wav file
def Encode(wav_file):
    ###########################################
    ##  Process the Audio
    ##########################################
    # 1. Read wav file
    file = tens.io.read_file(wav_file+'.wav')
    # 2. Decode the wav file
    audio, _ = tens.audio.decode_wav(file)
    audio = tens.squeeze(audio, axis=-1)
    # 3. Change type to float
    audio = tens.cast(audio, tens.float32)
    # 4. Get the spectrogram
    spectrogram = tens.signal.stft(
        audio, frame_length=frame_length, frame_step=frame_step, fft_length=fft_length
    )
    # 5. We only need the magnitude, which can be derived by applying tens.abs
    spectrogram = tens.abs(spectrogram)
    spectrogram = tens.math.pow(spectrogram, 0.5)
    # 6. normalisation
    means = tens.math.reduce_mean(spectrogram, 1, keepdims=True)
    stddevs = tens.math.reduce_std(spectrogram, 1, keepdims=True)
    spectrogram = (spectrogram - means) / (stddevs + 1e-10)
    return spectrogram

#model prediction
def Predict(spectrogram):
    data = [[fname, 'SPT']]
    df = pd.DataFrame(data, columns=['file_name', 'normalized_transcription'])
    speech = tens.data.Dataset.from_tensor_slices((list(df["file_name"])))
    speech = (speech.map(Encode, num_parallel_calls=tens.data.AUTOTUNE).padded_batch(batch_size)
             .prefetch(buffer_size=tens.data.AUTOTUNE))
    for string in speech:
        result = model.predict(string)
    return result

def Decode(sample):
    input_len = np.ones(sample.shape[0]) * sample.shape[1]
    # Use greedy search. For complex tasks, you can use beam search
    results = tens.keras.backend.ctc_decode(sample, input_length=input_len, greedy=True)[0][0]
    # Iterate over the results and get back the text
    output_text = []
    for result in results:
        result = tens.strings.reduce_join(numTchar(result)).numpy().decode("utf-8")
        output_text.append(result)
    return output_text

In [3]:
#could have led anyone to predict the outburst of violence which finally occurred.
fname = 'LJ029-0001'
Decode(Predict(Encode(fname)))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


["report of the president's commission on the assassination of president kennedy the warren commission report by the president's commission on the assassination of president kennedy"]