<a href="https://colab.research.google.com/github/MauriVass/MachineLearningInIoT_HWs/blob/mauri/HM2/ML4IoT_HW2_KS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Keyword Spotting
import argparse
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
!pip install tensorflow_model_optimization
import tensorflow_model_optimization as tfmot

#Set a seed to get repricable results
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

class SignalGenerator:
  def __init__(self, labels, sampling_rate, frame_length, frame_step, num_mel_bins=None, lower_frequency=None, upper_frequency=None, num_coefficients=None, mfcc=False, image_size=32):
    self.labels=labels
    self.sampling_rate=sampling_rate
    self.frame_length=frame_length
    self.frame_step=frame_step
    self.num_mel_bins = num_mel_bins
    self.lower_frequency = lower_frequency
    self.upper_frequency = upper_frequency
    self.num_coefficients = num_coefficients
    self.mfccs=mfcc
    self.image_size = image_size

    if(mfcc):
      num_spectrogram_bins = frame_length // 2 + 1
      self.linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
          self.num_mel_bins,
          num_spectrogram_bins,
          self.sampling_rate,
          self.lower_frequency,
          self.upper_frequency)
      self.preprocess = self.preprocess_with_mfcc
    else:
      self.preprocess = self.preprocess_with_stft


  def read(self, file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    label = parts[-2]
    label_id = tf.argmax(label == self.labels)
    audio_bynary = tf.io.read_file(file_path)
    audio, _ = tf.audio.decode_wav(audio_bynary)
    #print('Sampling: ', np.array(r))
    audio = tf.squeeze(audio, axis=1)
    return audio, label_id

  def pad(self, audio):
    zero_padding = tf.zeros(self.sampling_rate - tf.shape(audio), dtype=tf.float32)
    audio = tf.concat([audio,zero_padding],0)
    audio.set_shape([self.sampling_rate])
    return audio

  def get_spectrogram(self, audio):
    #Calculate the STFT of the signal given frame_length and frame_step
    stft = tf.signal.stft(audio,
            frame_length=self.frame_length,
            frame_step=self.frame_step,
            fft_length=self.frame_length)
    #Transform the complex number in real number
    spectrogram = tf.abs(stft)
    return spectrogram

  def get_mfccs(self, spectrogram):
    mel_spectrogram = tf.tensordot(spectrogram,
            self.linear_to_mel_weight_matrix, 1)
    log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
    mfccs = mfccs[:, :self.num_coefficients]
    return mfccs

  def preprocess_with_stft(self, file_path):
    audio, label = self.read(file_path)
    audio = self.pad(audio)
    spectrogram = self.get_spectrogram(audio)
    spectrogram = tf.expand_dims(spectrogram, -1)
    spectrogram  = tf.image.resize(spectrogram, [self.image_size,self.image_size])
    return spectrogram, label

  def preprocess_with_mfcc(self, file_path):
    audio, label = self.read(file_path)
    audio = self.pad(audio)
    spectrogram = self.get_spectrogram(audio)
    mfccs = self.get_mfccs(spectrogram)
    mfccs = tf.expand_dims(mfccs, -1)
    return mfccs, label

  def make_dataset(self, files, train=False):
    #This method creates a dataset from a numpy array (our listfile path)
    ds = tf.data.Dataset.from_tensor_slices(files)
    #Different preprocess step depending on the input parameter
    ds = ds.map(self.preprocess, num_parallel_calls=4)
    ds = ds.batch(32)
    ds = ds.cache()

    if(train is True):
      ds = ds.shuffle(100, reshuffle_each_iteration=True)
    return ds

#Sparcity increases latency due to cache misses
class Model:
  def __init__(self,model_type,frame_length,frame_step,mfcc,num_mel_bins,num_coefficients,train_ds,image_size=32,alpha=1,sparsity=None):
    self.frame_length = frame_length
    self.frame_step = frame_step
    self.image_size = image_size
    self.num_coefficients = num_coefficients
    print('Summary: ',frame_length,frame_step,mfcc,num_mel_bins,num_coefficients,train_ds,alpha,sparsity)
    self.alpha = alpha
    self.sparsity=sparsity
    self.n_output = 8
    if(mfcc):
      self.strides = [2,1]
    else:
      self.strides = [2,2]

    self.model_type = model_type
    if(model_type=='MLP'):
      self.model = self.MLPmodel()
    elif(model_type=='CNN'):
      self.model = self.CNNmodel()
    elif(model_type=='DSCNN'):
      self.model = self.DSCNNmodel()
    else:
      raise KeyError('SPECIFY A MODEL TYPE [MLP, CNN, DSCNN]')

    self.mfcc = mfcc

    #CALLBACKS
    self.callbacks = []
    self.checkpoint_path = 'KSckp/'
    monitor = 'val_sparse_categorical_accuracy'
    self.model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
      filepath=self.checkpoint_path,
      save_weights_only=True,
      monitor=monitor,
      mode='max',
      save_best_only=True)
    self.callbacks.append(self.model_checkpoint_callback)

    self.early_stopping = tf.keras.callbacks.EarlyStopping(
      monitor=monitor, min_delta=0, patience=4, verbose=1, mode='auto',
      baseline=None, restore_best_weights=True)
    #self.callbacks.append(self.early_stopping)

    #self.lr_exp = tf.keras.callbacks.LearningRateScheduler(my_schedule, verbose=1)
    #self.callbacks.append(self.lr_exp)
    self.lr_onplateau = tf.keras.callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1,
      patience=3, min_lr=0.0001, verbose=1)
    #self.callbacks.append(self.lr_onplateau)

    self.sparsity = sparsity
    if(self.sparsity is not None):
      pruning_params = {
        'pruning_schedule':
        tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.30,
        final_sparsity=sparsity,
        begin_step=len(train_ds)*3,
        end_step=len(train_ds)*15)}

      prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
      self.model = prune_low_magnitude(self.model, **pruning_params)
      self.model_sparcity_callback = tfmot.sparsity.keras.UpdatePruningStep()
      self.callbacks.append(self.model_sparcity_callback)
      self.callbacks.append(tfmot.sparsity.keras.PruningSummaries(log_dir='PruningSumm/'))

      dim1 = ((16000-frame_length)/frame_step)+1
      #print(frame_length,frame_step,dim1)
      if(mfcc):
        input_shape = [None, int(dim1) , num_coefficients, 1]
      else:
        input_shape = [None, image_size, image_size, 1]
      print('Input Shape Sparsity: ', input_shape)
      self.model.build(input_shape)

    self.model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                       metrics=['sparse_categorical_accuracy'])

  def MLPmodel(self):
    model = keras.Sequential([
      keras.layers.Flatten(),
      keras.layers.Dense(int(256*self.alpha), activation='relu'),
      keras.layers.Dense(int(256*self.alpha), activation='relu'),
      keras.layers.Dense(int(256*self.alpha), activation='relu'),
      keras.layers.Dense(self.n_output)
      ])
    return model

	#Strides = [2,2] if STFT, [2,1] if MFCC
  def CNNmodel(self):
    model = keras.Sequential([
      keras.layers.Conv2D(filters=int(128),kernel_size=[3,3],strides=self.strides,use_bias=False),
      keras.layers.BatchNormalization(momentum=0.1),
      keras.layers.Activation('relu'),
      keras.layers.Conv2D(filters=int(256*self.alpha),kernel_size=[3,3],strides=[1,1],use_bias=False),
      keras.layers.BatchNormalization(momentum=0.1),
      keras.layers.Activation('relu'),
      keras.layers.Conv2D(filters=int(256*self.alpha),kernel_size=[3,3],strides=[1,1],use_bias=False),
      keras.layers.BatchNormalization(momentum=0.1),
      keras.layers.Activation('relu'),
      keras.layers.GlobalAveragePooling2D(),
      keras.layers.Dense(int(256*self.alpha), activation='relu'),
      keras.layers.Dense(self.n_output)
      ])
    return model

  def DSCNNmodel(self):
    model = keras.Sequential([
      keras.layers.Conv2D(filters=int(256*self.alpha),kernel_size=[3,3],strides=self.strides,use_bias=False), #input_shape=(32,32,1)
      keras.layers.BatchNormalization(momentum=0.1),
      keras.layers.Activation('relu'),
      keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
      keras.layers.Conv2D(filters=int(256*self.alpha),kernel_size=[1,1],strides=[1,1],use_bias=False),
      keras.layers.BatchNormalization(momentum=0.1),
      keras.layers.Activation('relu'),
      keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
      keras.layers.Conv2D(filters=int(256*self.alpha),kernel_size=[1,1],strides=[1, 1],use_bias=False),
      keras.layers.BatchNormalization(momentum=0.1),
      keras.layers.Activation('relu'),
      keras.layers.GlobalAveragePooling2D(),
      keras.layers.Dense(self.n_output)
      ])
    return model

  def Train(self,train,validation,epoch):
    if(True):
      for c in self.callbacks:
        print(c)
    print('Training')
    history = self.model.fit(train, batch_size=32, epochs=epoch, verbose=1,
        validation_data=validation, validation_freq=1, callbacks=self.callbacks)
    return history

  def Test(self, test, best=True):
    print('Evaluation')
    if(best):
        self.model.load_weights(self.checkpoint_path)
    loss, error = self.model.evaluate(test, verbose=1)
    return (loss, error)

  def SaveModel(self,output,best=True):
    output += self.model_type[0]
    output += str(self.mfcc)
    if(self.alpha!=1):
      output += f'A{self.alpha}'
    if(self.sparsity!=None):
      output += f'S{self.sparsity}'
    output += f'FL{self.frame_length}'
    output += f'FS{self.frame_step}'
    if(self.image_size is not 32):
      output += f'IS{self.image_size}'
    if(self.mfcc):
      output += f'NM{self.num_coefficients}'
    if(best):
      self.model.load_weights(self.checkpoint_path)
    if(self.sparsity is not None):
      self.Strip()
    output = output.replace('.','_')
    print(f'Saving: {output}')
    self.model.save(output)
    return output

  def Strip(self):
    self.model = tfmot.sparsity.keras.strip_pruning(self.model)

#Download and extract the .csv file. The result is cached to avoid to download everytime
zip_path = tf.keras.utils.get_file(
	origin='http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip',
	fname='mini_speech_commands.zip',
	extract=True,
	cache_dir='.', cache_subdir='data')

data_dir = os.path.join('.', 'data', 'mini_speech_commands')

def readFile(file):
  elems = []
  fp = open(file,'r')
  for f in fp:
    elems.append(f.strip())
  return elems

#Spit the dataset following the .pdf requirements
if(os.path.exists('Temp')==False):
  !git clone https://github.com/MauriVass/Temp.git
train_files = readFile('Temp/kws_train_split.txt')
validation_files = readFile('Temp/kws_val_split.txt')
test_files = readFile('Temp/kws_test_split.txt')

#Extract the labes: folders inside the data folder
LABELS = np.array(tf.io.gfile.listdir(str(data_dir)))
#Remove the 'README.md' file, since not useful
LABELS = LABELS[LABELS != 'README.md']
print(LABELS)



In [None]:
# for m in ['MLP', 'CNN', 'DSCNN']:
#   for f in [False, True]:

### MAIN PARAMETERS ###
#It can be ['MLP', 'CNN', 'DSCNN']
model = ''
#It can be: True, False
mfcc = False
#It can be (0,1]
alpha = 1
#Sparcity increases latency(may be a problem for KS) due to cache misses
#it can be (0.3,1) or None(if you don't to use sparsity)
sparsity = None

#Here you can change:
#STFT(mfcc=False): frame_length, frame_step
#MFCC(mfcc=True): frame_length, frame_step, num_mel_bins, num_coefficients, lower_frequency(?), upper_frequency(?)
frame_length = 640  #Default 640 (mfcc=True), 256(mfcc=False)
frame_step = 320 #Default 320 (mfcc=True), 128(mfcc=False)
num_mel_bins = 40 #Default 40 (only mfcc=True)
num_coefficients = 10 #Default 10 (only mfcc=True)
image_size = 32 #Default 32 (only mfcc=False)
if(mfcc):
  sg = SignalGenerator(labels=LABELS, sampling_rate=16000, frame_length=int(frame_length), frame_step=int(frame_step),
        num_mel_bins=int(num_mel_bins), lower_frequency=20, upper_frequency=4000, num_coefficients=int(num_coefficients), mfcc=mfcc)
else:
  sg = SignalGenerator(labels=LABELS, sampling_rate=16000, frame_length=frame_length, frame_step=frame_step, image_size=image_size)
### END MAIN PARAMETERS ###

train_ds = sg.make_dataset(train_files,True)
val_ds = sg.make_dataset(validation_files)
test_ds = sg.make_dataset(test_files)
print(f'Train: {len(train_ds)}, Val: {len(val_ds)}, Test: {len(test_ds)}')

for x,y in train_ds.take(1):
  print(x.shape,y.shape)

save_best = True
model = Model(model,frame_length=int(frame_length),frame_step=int(frame_step),mfcc=mfcc,train_ds=train_ds,num_mel_bins=int(num_mel_bins),num_coefficients=int(num_coefficients),image_size=image_size,alpha=alpha,sparsity=sparsity)
hist = model.Train(train_ds,val_ds,20)
loss, acc = model.Test(test_ds,save_best)
print('Accuracy test set: ',acc)
output_model = model.SaveModel(f'KS_',save_best)
model.model.summary()

In [None]:
#Deployer, Optimizer W_WA
import argparse
import tensorflow as tf
import os

def representative_dataset_gen():
    for x, _ in train_ds.take(1000):
        yield [x]

def Optimize(saved_model_dir,quantization,zipping):
  converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
  if(quantization=='w'):
      print('Only Weight')
      #Quantization Weights only
      converter.optimizations = [tf.lite.Optimize.DEFAULT]
      mini_float = False
      if(mini_float):
          converter.target_spec.supported_types = [tf.float16]
      tflite_model_dir = saved_model_dir + '.tflite_W'
  elif(quantization=='wa'):
      print('Weight Activation')
      #Quantization Weights and Activation
      converter.optimizations = [tf.lite.Optimize.DEFAULT]
      converter.representative_dataset = representative_dataset_gen
      tflite_model_dir = saved_model_dir + '.tflite_WA'
  else:
      tflite_model_dir = saved_model_dir + '.tflite'
  tflite_model = converter.convert()

  #Compression
  if(zipping is False):
      with open(tflite_model_dir, 'wb') as fp:
          fp.write(tflite_model)
  else:
      print('Compression')
      import zlib
      tflite_model_dir = tflite_model_dir + '.zip'
      with open(tflite_model_dir, 'wb') as fp:
          tflite_compressed = zlib.compress(tflite_model)#,level=9
          fp.write(tflite_compressed)

  print('Saving: ', tflite_model_dir)
  size_tflite_model = os.path.getsize(tflite_model_dir)
  print(f'Tflite Model size: {(size_tflite_model/1024):.2f} kB')
  return tflite_model_dir

In [None]:
# print(model.model.metrics_names)

In [None]:
# hist.history

In [None]:
#Optimization for TH Forecasting
#any -> none
#w -> only weights
#wa -> weights and activation (have some problem with the shape/last reshape layer (maybe))
quantization = 'w' 
zipping = False
saved_model_dir = output_model
output_tflite_model = Optimize(saved_model_dir,quantization,zipping)

In [None]:
#Decompress
# import zlib
# model_path = output_tflite_model 
# if(model_path.find('zip')<0):
#   raise KeyError('YOU CAN\'T DECOMPRESS A NON .zip MODEL')
# with open(model_path, 'rb') as fp:
#     model = zlib.decompress(fp.read())
#     output_model = model_path[:-4]
#     file = open(output_model,'wb')
#     print('Saving: ',output_model)
#     file.write(model)
#     file.close()

In [None]:
#          Destination      Origin
# !zip -r ./th_test_stft.zip ./th_test_stft

In [None]:
# !unzip THFmodelCNN.zip ./THFmodelCNN

In [None]:
#Save the tensor so that you can use them in other scripts
# tf.data.experimental.save(train_ds, './th_train')
# tf.data.experimental.save(val_ds, './th_val')
#tf.data.experimental.save(test_ds, './th_test_stft')

# mfcc = False
# if(mfcc):
#   input_shape = [None, 49, 10, 1]
# else:
#   input_shape = [None, 32, 32, 1]

#tensor_specs = (tf.TensorSpec(input_shape, dtype=tf.float32),tf.TensorSpec(input_shape))
# train_ds = tf.data.experimental.load('./th_train', tensor_specs)
# val_ds = tf.data.experimental.load('./th_val', tensor_specs)
#test_ds = tf.data.experimental.load('./th_test_stft', tensor_specs)

In [None]:
#Test Models
import time
import tensorflow.lite as tflite

saved_model_dir = output_tflite_model
if(saved_model_dir.find('zip')>0):
  raise KeyError('YOU CAN\'T TEST A .zip MODEL. (Use zipping=False in Optimize() method)')
test_ds1 = test_ds.unbatch().batch(1)

interpreter = tf.lite.Interpreter(model_path=saved_model_dir)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
acc = 0
n = 0
time_infe = 0
print(test_ds1)

for x,y in test_ds1:
  #print(x,y)
  input_data = x
  y_true = y.numpy()[0]
  
  ti = time.time()
  interpreter.set_tensor(input_details[0]['index'], input_data)
  interpreter.invoke()
  my_output = interpreter.get_tensor(output_details[0]['index'])[0]
  time_infe += time.time()-ti

  n+=1
  index_pred = np.argmax(my_output)
  if(index_pred==y_true):
    acc += 1

print(f'Accuracy: {(acc/n):.3f}, time: {(time_infe/n)*1000} ms')

In [None]:
#To run on board
import argparse
import numpy as np
from subprocess import call
import tensorflow as tf
import time
from scipy import signal

#Evaluation
model = 'DSCNN.tflite' #path tflite
rate = 16000
mfcc = False
if(mfcc):
  length = 640
  stride = 320
else:
  length = 256
  stride = 128
resize = 32
num_mel_bins = 40
num_coefficients = 10

num_frames = (rate - length) // stride + 1
num_spectrogram_bins = length // 2 + 1

linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, rate, 20, 4000)

if model is not None:
    interpreter = tf.lite.Interpreter(model_path=model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()


inf_latency = []
tot_latency = []
for i in range(100):
    sample = np.array(np.random.random_sample(48000), dtype=np.float32)

    start = time.time()

    # Resampling
    sample = signal.resample_poly(sample, 1, 48000 // rate)

    sample = tf.convert_to_tensor(sample, dtype=tf.float32)

    # STFT
    stft = tf.signal.stft(sample, length, stride,
            fft_length=length)
    spectrogram = tf.abs(stft)

    if mfcc is False and resize > 0:
        # Resize (optional)
        spectrogram = tf.reshape(spectrogram, [1, num_frames, num_spectrogram_bins, 1])
        spectrogram = tf.image.resize(spectrogram, [resize, resize])
        input_tensor = spectrogram
    else:
        # MFCC (optional)
        mel_spectrogram = tf.tensordot(spectrogram, linear_to_mel_weight_matrix, 1)
        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
        mfccs = mfccs[..., :num_coefficients]
        mfccs = tf.reshape(mfccs, [1, num_frames, num_coefficients, 1])
        input_tensor = mfccs

    if model is not None:
        interpreter.set_tensor(input_details[0]['index'], input_tensor)
        start_inf = time.time()
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]['index'])

    end = time.time()
    tot_latency.append(end - start)

    if model is None:
        start_inf = end

    inf_latency.append(end - start_inf)
    time.sleep(0.1)

print('Inference Latency {:.2f}ms'.format(np.mean(inf_latency)*1000.))
print('Total Latency {:.2f}ms'.format(np.mean(tot_latency)*1000.))