# Convert the test audio data to C source

In [1]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
from IPython.display import Image, clear_output
from pydub import AudioSegment
import os

import math

import matplotlib.pyplot as plt
import numpy as np
from scipy.io.wavfile import read, write
import librosa

## Export the audio file to header file

In [2]:
class audioFileP:  #father class
    def __init__(self, folder, filename, output):
        self.folder = folder
        self.filename = filename
        self.filepath = folder + '\\' + filename
        self.out = output
        #self.rate, self.data = read(self.filepath)
        self.data, self.rate = librosa.load(self.filepath, sr=16000, duration=10)
        self.data = self.data[24000:57408] # Choose the range you want
    
        print("Sample rate: {} Hz".format(self.rate))
        print("Data type: {}".format(self.data.dtype))
        print(len(self.data))
         
    def create_tag_folder(self, tag_name):
        dir_path = os.path.join(os.getcwd(), tag_name)
        try:
            os.mkdir(dir_path)
        except OSError as error:
            print(error)
            print('skip create')
            
        print(os.getcwd())
    def main(self):
        with open(self.out, 'w') as f:
            f.write("#define WAVE_DATA {")
            self._write_tflite_data(f, self.out)
        
    
    def _write_tflite_data(self, open_file, out_path):
        
        line=''
        i = 1;
        for i, v in enumerate(self.data):
            line = line + str(v) + ','
        line = line + '}\n'
        open_file.write(line)
           #try:
           #    line = line + str(v) + ','
           #    if i % 20 == 0:
           #        line = line + '\n\t'
           #        open_file.write(line)
           #        line = '' 
           #    i += 1
           #except StopIteration:
           #    ine = line[:-2] + '};\n'
           #    open_file.write(line)
           #    break
        
        #C:\Users\ML_m460bsp_tflu\SampleCode\tflu_kws_arm\raw
        #raw/left_2.h
        


In [6]:
src_folder = r"datasets\ESC-50\audio"
src_file = r"1-100032-A-0.wav"
dst_file = r"outputs\2024_02_05_16_28_26\C_header\1-100032-A-0-dog_1.5_3.5.h"

In [7]:
x = audioFileP(src_folder, src_file, dst_file)
x.main()

Sample rate: 16000 Hz
Data type: float32
33408


# Convert tflite to C source file

In [7]:
tflite_name = r"outputs\2024_02_05_16_28_26\quantized_models\quantized_model.tflite"
out_file = r"outputs\2024_02_05_16_28_26\C_header\quantized_miniresnetv2.c"

In [8]:
! python tflite_to_tflu.py --tflite_path $tflite_name --output_path $out_file

# Test the preprocess values

In [2]:
import sys,os,logging
sys.path.append(os.path.abspath('../utils'))
from preprocess import load_and_reformat
from feature_extraction import get_patches

## A. The training preprocess + feature Case

In [5]:
src_folder = r"datasets\ESC-50\audio"
src_file = os.path.join(src_folder, r"1-100032-A-0.wav")

In [51]:
wave, sr = load_and_reformat(wave_path=src_file,
                                 min_length=2,
                                 max_length=10,
                                 target_rate=16000,
                                 top_db=60,
                                 frame_length=3200,
                                 hop_length=3200,
                                 trim_last_second=False)
        
patches = get_patches(wave=wave,
                      sr=sr,
                      patch_length=50,
                      overlap=0.25,
                      n_fft=1024,
                      hop_length=320,
                      include_last_patch=False,
                      win_length=1024,
                      window='hann',
                      center=True,
                      pad_mode='constant',
                      power=2.0,
                      n_mels=64,
                      fmin=20,
                      fmax=7500,
                      power_to_db_ref=np.max,
                      norm='slaney',
                      htk=False,
                      to_db=True,
                      )

[INFO] Sound wave shorter than min_length, repeating
[INFO] Sound wave repeated 3 times
db_melspec
101 64


In [52]:
print(len(wave))
print(len(patches[1][1]))

32000
50


## B. The Inference Case

In [63]:
src_folder = r"datasets\ESC-50\audio"
src_file = os.path.join(src_folder, r"1-110389-A-0.wav")

wave, sr = librosa.load(src_file, sr=16000, duration=10)

#wave = wave[24000:57408] #wave[24000:40704] #wave[24000:57408]

patches = get_patches(wave=wave,
                      sr=sr,
                      patch_length=50,
                      overlap=0.25,
                      n_fft=1024,
                      hop_length=320,
                      include_last_patch=False,
                      win_length=1024,
                      window='hann',
                      center=True,
                      pad_mode='constant',
                      power=2.0,
                      n_mels=64,
                      fmin=20,
                      fmax=7500,
                      power_to_db_ref=np.max,
                      norm='slaney',
                      htk=False,
                      to_db=True,
                      )

db_melspec
251 64
[[-41.457413 -35.091324 -37.754646 ... -80.       -80.       -80.      ]
 [-47.76841  -42.010834 -44.58092  ... -80.       -80.       -80.      ]
 [-42.961098 -36.48146  -45.14736  ... -80.       -80.       -80.      ]
 ...
 [-77.72628  -67.10232  -55.49411  ... -80.       -80.       -80.      ]
 [-80.       -72.94021  -57.0795   ... -80.       -80.       -80.      ]
 [-80.       -74.87651  -66.781906 ... -80.       -80.       -80.      ]]


In [64]:
print("Total wav time: {}".format(len(wave)))
print("Num patches: {}".format(len(patches)))
print(len(patches[0]))
print(len(patches[0][1]))

Total wav time: 80000
Num patches: 6
64
50


In [65]:
import numpy as np
import sklearn.metrics
import tensorflow as tf
import tqdm
from hydra.core.hydra_config import HydraConfig
from evaluation import _aggregate_predictions, compute_accuracy_score

In [66]:
clip_labels = []
clip_labels.extend([0] * len(patches)) # only 1 test data
clip_labels = np.array(clip_labels)

X = []
y= []

X.extend(patches)
X = np.stack(X, axis=0)
X = np.expand_dims(X, axis=-1)

y.extend(['dog'] * len(patches))
vocab = ['dog', 'chainsaw', 'crackling_fire', 'helicopter', 'rain',
       'crying_baby', 'clock_tick', 'sneezing', 'rooster', 'sea_waves']
string_lookup_layer = tf.keras.layers.StringLookup(
        vocabulary=sorted(list(vocab)),
        num_oov_indices=0,
        output_mode='one_hot')
y = np.array(string_lookup_layer(y))

  return bool(asarray(a1 == a2).all())


In [67]:
print(X.shape)
print(type(X))

(6, 64, 50, 1)
<class 'numpy.ndarray'>


In [68]:
#tflite_name = r"outputs\2024_02_05_16_28_26\quantized_models\quantized_model_audio.tflite"
#tflite_name = r"C:\Users\USER\Desktop\ML\tiny_nu_audio\workspace\2024_03_13_16_13_45_512hop_woOther\quantized_models\quantized_model.tflite"
tflite_name = r"C:\Users\USER\Desktop\ML\ML_tf2_image_classfication_nu\vela\generated\quantized_miniresnetv2.tflite"

X_test = X
y_test = y

tf.print('[INFO] Evaluating the quantized model ...')
interpreter_quant = tf.lite.Interpreter(model_path=tflite_name)

input_details = interpreter_quant.get_input_details()[0]
#print(input_details)
output_details = interpreter_quant.get_output_details()[0]
#print(output_details)

tf.print("[INFO] Quantization input details : {}".format(input_details["quantization"]))
tf.print("[INFO] Dtype input details : {}".format(input_details["dtype"]))
input_index_quant = interpreter_quant.get_input_details()[0]["index"]

output_index_quant = interpreter_quant.get_output_details()[0]["index"]
interpreter_quant.resize_tensor_input(input_index_quant, list(X_test.shape))
interpreter_quant.allocate_tensors()
X_processed = (X_test / input_details['quantization'][0]) + input_details['quantization'][1]

print(np.iinfo(input_details['dtype']).min, np.iinfo(input_details['dtype']).max)
#print(np.round(X_processed))

X_processed = np.clip(np.round(X_processed), np.iinfo(input_details['dtype']).min, np.iinfo(input_details['dtype']).max)
X_processed = X_processed.astype(input_details['dtype'])
#print(X_processed)

interpreter_quant.set_tensor(input_index_quant, X_processed)
interpreter_quant.invoke()
preds = interpreter_quant.get_tensor(output_index_quant)

# Aggregate predictions
aggregated_preds = _aggregate_predictions(preds=preds,
                                            clip_labels=clip_labels,
                                            is_multilabel=False,
                                            is_truth=False)
aggregated_truth = _aggregate_predictions(preds=y_test,
                                            clip_labels=clip_labels,
                                            is_multilabel=False,
                                            is_truth=True)
 #generate the confusion matrix for the float model
patch_level_accuracy = compute_accuracy_score(y_test, preds,
                                                is_multilabel=False)
print("[INFO] : Quantized model patch-level accuracy on test set : {}".format(patch_level_accuracy))

#clip_level_accuracy = compute_accuracy_score(aggregated_truth, aggregated_preds,
#                                                is_multilabel=False)
#print("[INFO] : Quantized model clip-level accuracy on test set : {}".format(clip_level_accuracy))

[INFO] Evaluating the quantized model ...
[INFO] Quantization input details : (0.3137255012989044, 127)
[INFO] Dtype input details : <class 'numpy.int8'>
-128 127
[INFO] : Quantized model patch-level accuracy on test set : 0.16666666666666666


In [69]:
print(preds)


[[-128 -120 -128 -128  103 -128 -128 -125 -128 -114]
 [-128 -127 -128 -128 -125 -128 -128  -47 -128   42]
 [-128 -127 -128 -128 -125 -128 -128  -47 -128   42]
 [-128 -127 -128 -128 -125 -128 -128  -47 -128   42]
 [-128 -127 -128 -128 -125 -128 -128  -47 -128   42]
 [-128 -127 -128 -128 -125 -128 -128  -47 -128   42]]


In [70]:
print(output_details['quantization'][0])
print(output_details['quantization'][1])

0.00390625
-128


In [71]:
preds = preds.astype('float')
preds_q = (preds - output_details['quantization'][1]) * output_details['quantization'][0]
print(preds_q)

[[0.         0.03125    0.         0.         0.90234375 0.
  0.         0.01171875 0.         0.0546875 ]
 [0.         0.00390625 0.         0.         0.01171875 0.
  0.         0.31640625 0.         0.6640625 ]
 [0.         0.00390625 0.         0.         0.01171875 0.
  0.         0.31640625 0.         0.6640625 ]
 [0.         0.00390625 0.         0.         0.01171875 0.
  0.         0.31640625 0.         0.6640625 ]
 [0.         0.00390625 0.         0.         0.01171875 0.
  0.         0.31640625 0.         0.6640625 ]
 [0.         0.00390625 0.         0.         0.01171875 0.
  0.         0.31640625 0.         0.6640625 ]]
