<b>Importing the desired libraries </b>

In [1]:
import numpy as np
import json
import cherrypy
import tensorflow as tf
import os 

<b>First we have to add the model and do what we did for HW2 in here as well </b>

In [2]:
#adding the tflite model file

model_path="../Prerequisite/kws_dscnn_True.tflite" 


#--------------------------------------------------------------------------------------------------------------------------


#downloading the dataset 
zip_path = tf.keras.utils.get_file(
    origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
    fname='mini_speech_commands.zip',
    extract=True,
    cache_dir='.', cache_subdir='data')






In [6]:
#adding the dataset

data_dir = os.path.join('.', 'data', 'mini_speech_commands')
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
filenames = tf.random.shuffle(filenames)
num_samples = len(filenames)
LABELS = ['stop', 'up', 'yes', 'right', 'left', 'no', 'down', 'go']
total = 8000
test_files = open('../Prerequisite/kws_test_split.txt', 'r')


#--------------------------------------------------------------------------------------------------------------------------
#getting the labels for the test set (we don't need the training set here)

test_list = []
y_test = []
for i in test_files:
    test_list.append(i)
    tmp = i.replace('./data/mini_speech_commands/', '')
    loc_slash = tmp.find('/')
    y_test.append(LABELS.index(tmp[:loc_slash]))
test_files = tf.convert_to_tensor([s.rstrip() for s in test_list])
y_test = np.array(y_test)


#--------------------------------------------------------------------------------------------------------------------------


#we use this class in order to create our test/train/val set in the desired way
class SignalGenerator:
    def __init__(self, labels, sampling_rate, frame_length, frame_step,
                 num_mel_bins=None, lower_frequency=None, upper_frequency=None,
                 num_coefficients=None, mfcc=False):
        self.labels = labels
        self.sampling_rate = sampling_rate
        self.frame_length = frame_length
        self.frame_step = frame_step
        self.num_mel_bins = num_mel_bins
        self.lower_frequency = lower_frequency
        self.upper_frequency = upper_frequency
        self.num_coefficients = num_coefficients
        num_spectrogram_bins = (frame_length) // 2 + 1

        if mfcc is True:
            self.linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
                self.num_mel_bins, num_spectrogram_bins, self.sampling_rate,
                self.lower_frequency, self.upper_frequency)
            self.preprocess = self.preprocess_with_mfcc
        else:
            self.preprocess = self.preprocess_with_stft

    def read(self, file_path):
        print(file_path)
        parts = tf.strings.split(file_path, os.path.sep)
        label = parts[-2]
        print(parts)
        print(label)
        label_id = tf.argmax(label == self.labels)
        audio_binary = tf.io.read_file(file_path)
        audio, _ = tf.audio.decode_wav(audio_binary)
        audio = tf.squeeze(audio, axis=1)

        return audio, label_id

    def pad(self, audio):
        zero_padding = tf.zeros([self.sampling_rate] - tf.shape(audio), dtype=tf.float32)
        audio = tf.concat([audio, zero_padding], 0)
        audio.set_shape([self.sampling_rate])

        return audio

    def get_spectrogram(self, audio):
        stft = tf.signal.stft(audio, frame_length=self.frame_length,
                              frame_step=self.frame_step, fft_length=self.frame_length)
        spectrogram = tf.abs(stft)

        return spectrogram

    def get_mfccs(self, spectrogram):
        mel_spectrogram = tf.tensordot(spectrogram,
                                       self.linear_to_mel_weight_matrix, 1)
        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
        mfccs = mfccs[..., :self.num_coefficients]

        return mfccs

    def preprocess_with_stft(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        spectrogram = tf.expand_dims(spectrogram, -1)
        spectrogram = tf.image.resize(spectrogram, [32, 32])

        return spectrogram, label

    def preprocess_with_mfcc(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        mfccs = self.get_mfccs(spectrogram)
        mfccs = tf.expand_dims(mfccs, -1)

        return mfccs, label

    def make_dataset(self, files):
        ds = tf.data.Dataset.from_tensor_slices(files)
        ds = ds.map(self.preprocess, num_parallel_calls=4)
        ds = ds.batch(32)
        ds = ds.cache()

        return ds


In [7]:
MFCC_OPTIONS = {'frame_length': 40, 'frame_step': 20, 'mfcc': True,
                    'lower_frequency': 20, 'upper_frequency': 4000, 'num_mel_bins': 40,
                    'num_coefficients':10 }

options = MFCC_OPTIONS
strides = [2, 1]

generator = SignalGenerator(LABELS, 16000, **options)
test_ds = generator.make_dataset(test_files)


Tensor("args_0:0", shape=(), dtype=string)
Tensor("StringSplit/RaggedGetItem/strided_slice_5:0", shape=(None,), dtype=string)
Tensor("strided_slice:0", shape=(), dtype=string)


<b>Now we have to do the prediction using the tflite model</b>

In [5]:
#code for using a tflite model for prediction
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.resize_tensor_input(0, [-1, 49, 10, 1],True)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()


# type(input_details[0]['index'])
# input_data = np.array(test_ds, dtype=np.float32)
# input_data = input_data.reshape(1, 6, 2)

interpreter.set_tensor(0, test_ds)
interpreter.invoke()
predict_result = interpreter.get_tensor(output_details[0]['index'])

RuntimeError: tensorflow/lite/core/subgraph.cc BytesRequired number of elements overflowed.


In [None]:
input_details


In [None]:
test_ds
