In [None]:
import keras.models
!sudo apt -y install libportaudio2

# The code below is needed to run the code in Google Colab, which uses python3.10
!wget https://github.com/Gulianrdgd/tflite-support/releases/download/3.10.0/tflite_support-3.10.0-cp310-cp310-linux_x86_64.whl
!pip install ./tflite_support-3.10.0-cp310-cp310-linux_x86_64.whl

!pip install tflite-model-maker==0.4.2 python_speech_features keras==2.11.0
!pip install python_speech_features --upgrade

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  libportaudio2
0 upgraded, 1 newly installed, 0 to remove and 8 not upgraded.
Need to get 65.3 kB of archives.
After this operation, 223 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libportaudio2 amd64 19.6.0-1.1 [65.3 kB]
Fetched 65.3 kB in 1s (60.4 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package libportaudio2:amd64.
(Reading database ... 129824 fil

In [72]:
import os
import glob
import random
import shutil

import librosa
import soundfile as sf
from IPython.display import Audio
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.io import wavfile

from tensorflow.keras import Model

from keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D, Input, Bidirectional, BatchNormalization, Lambda, Dot, Softmax, LSTM
from keras.losses import SparseCategoricalCrossentropy
from keras.optimizers import Adam
from tensorflow.keras import backend

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import sklearn
from keras.utils import np_utils

import tensorflow as tf

from python_speech_features import mfcc

print(f"TensorFlow Version: {tf.__version__}")

TensorFlow Version: 2.12.0


## Export the model

The last step is exporting your model into the TensorFlow Lite format for execution on mobile/embedded devices and into the [SavedModel format](https://www.tensorflow.org/guide/saved_model) for execution elsewhere.

When exporting a `.tflite` file from Model Maker, it includes [model metadata](https://www.tensorflow.org/lite/inference_with_metadata/overview) that describes various details that can later help during inference. It even includes a copy of the classification labels file, so you don't need to a separate `labels.txt` file. (In the next section, we show how to use this metadata to run an inference.)

In [73]:
TFLITE_ORIG_FILENAME = '/content/drive/MyDrive/model_data_0_lstm_2_2kHz_mid_15_dirty-label.h5'
TFLITE_FILENAME = 'browserfft-speech.tflite'
TFLITE_METADATA_FILENAME = TFLITE_ORIG_FILENAME.replace('.h5', ".tflite")
SAMPLE_RATE=16000
CHANNELS = 1
LABEL_FILE = '30.txt'

In [76]:
from tflite_support.metadata_writers import audio_classifier
from tflite_support.metadata_writers import writer_utils

#model = tf.keras.models.load_model(TFLITE_ORIG_FILENAME)

# Only for LSTM

learning_rate = 0.0001
loss = "sparse_categorical_crossentropy"
inputs = Input((101, 40, 1), name='input')

x = Conv2D(10, (5, 1), activation='relu', padding='same')(inputs)
x = BatchNormalization()(x)
x = Conv2D(1, (5, 1), activation='relu', padding='same')(x)
x = BatchNormalization()(x)

x = Lambda(lambda q: backend.squeeze(q, -1), name='squeeze_last_dim')(x)

x = Bidirectional(LSTM(64, return_sequences=True))(x)  # [b_s, seq_len, vec_dim]
x = Bidirectional(LSTM(64, return_sequences=True))(x)  # [b_s, seq_len, vec_dim]

xFirst = Lambda(lambda q: q[:, -1])(x)  # [b_s, vec_dim]
query = Dense(128)(xFirst)

# dot product attention
attScores = Dot(axes=[1, 2])([query, x])
attScores = Softmax(name='attSoftmax')(attScores)  # [b_s, seq_len]

# rescale sequence
attVector = Dot(axes=[1, 1])([attScores, x])  # [b_s, vec_dim]

x = Dense(64, activation='relu')(attVector)
x = Dropout(0.5)(x)
x = Dense(32)(x)

output = Dense(30, activation='softmax', name='output')(x)
model = Model(inputs=[inputs], outputs=[output])

# compile model
optimiser = tf.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=optimiser, loss=loss, metrics=["accuracy"])

model.load_weights(TFLITE_ORIG_FILENAME)

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.experimental_new_converter=True
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
tf.lite.OpsSet.SELECT_TF_OPS]

tflite_model = converter.convert()




open(TFLITE_FILENAME, "wb").write(tflite_model)

AudioClassifierWriter = audio_classifier.MetadataWriter

# Create the metadata writer.
writer = AudioClassifierWriter.create_for_inference(
    writer_utils.load_file(f'{TFLITE_FILENAME}'), SAMPLE_RATE, CHANNELS ,
    [LABEL_FILE])

# Verify the metadata generated by metadata writer.
print(writer.get_metadata_json())

# Populate the metadata into the model.
writer_utils.save_file(writer.populate(), f'{TFLITE_METADATA_FILENAME}')




{
  "name": "AudioClassifier",
  "description": "Identify the most prominent type in the audio clip from a known set of categories.",
  "subgraph_metadata": [
    {
      "input_tensor_metadata": [
        {
          "name": "audio_clip",
          "description": "Input audio clip to be classified.",
          "content": {
            "content_properties_type": "AudioProperties",
            "content_properties": {
              "sample_rate": 16000,
              "channels": 1
            }
          },
          "stats": {
          }
        }
      ],
      "output_tensor_metadata": [
        {
          "name": "probability",
          "description": "Scores of the labels respectively.",
          "content": {
            "content_properties_type": "FeatureProperties",
            "content_properties": {
            }
          },
          "stats": {
            "max": [
              1.0
            ],
            "min": [
              0.0
            ]
          },
          

In [77]:
from tflite_support import metadata
import json

def get_labels(model):
  """Returns a list of labels, extracted from the model metadata."""
  displayer = metadata.MetadataDisplayer.with_model_file(model)
  labels_file = displayer.get_packed_associated_file_list()[0]
  labels = displayer.get_associated_file_buffer(labels_file).decode()
  return [line for line in labels.split('\n')]

def get_input_sample_rate(model):
  """Returns the model's expected sample rate, from the model metadata."""
  displayer = metadata.MetadataDisplayer.with_model_file(model)
  metadata_json = json.loads(displayer.get_metadata_json())
  input_tensor_metadata = metadata_json['subgraph_metadata'][0][
          'input_tensor_metadata'][0]
  input_content_props = input_tensor_metadata['content']['content_properties']
  return input_content_props['sample_rate']

  # Ensure the audio sample fits the model input
interpreter = tf.lite.Interpreter(TFLITE_METADATA_FILENAME)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_size = input_details[0]['shape'][1]
sample_rate = get_input_sample_rate(TFLITE_METADATA_FILENAME)

print(input_details)
print(output_details)
print(input_size)
print(sample_rate)


mfcc = []
for i in range(101):
  mfcc.append([])
  for j in range(40):
    mfcc[i].append(0.0)

mfcc = np.array(mfcc, dtype=np.float32)
mfcc = mfcc.reshape(1, 101, 40, 1)

# Run inference
interpreter.allocate_tensors()
interpreter.set_tensor(input_details[0]['index'], mfcc)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)

[{'name': 'serving_default_input:0', 'index': 0, 'shape': array([  1, 101,  40,   1], dtype=int32), 'shape_signature': array([ -1, 101,  40,   1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[{'name': 'StatefulPartitionedCall:0', 'index': 109, 'shape': array([ 1, 30], dtype=int32), 'shape_signature': array([-1, 30], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
101
16000
[[0.0326908  0.02021374 0.02794598 0.02265681 0.02229991 0.04373153
  0.03703569 0.02993943 0.02368657 0.02385453 0.07419721 0.01960711
  0.01765985 0.01509392 0.02296214 0.04217013 0.03766866 0.0234642
  0.02849479 0.03220792 0.03180071 0.05055904 0.0

In [78]:
try:
  from google.colab import files
except ImportError:
  pass
else:
  files.download(TFLITE_METADATA_FILENAME)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>