# KWS Demo 

In [None]:
#@title Importing useful libraries
!pip install -q python_speech_features 
!pip install -q https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip

import os 
import sys
import json  
import time
from   datetime                import date

from collections import Counter
import pandas                  as pd
import numpy                   as np 
import matplotlib.pyplot       as plt  
import seaborn                 as sn
from sklearn.metrics import confusion_matrix, accuracy_score

import tensorflow              as tf
from   tensorflow              import keras
from   tensorflow.keras        import layers, Input, backend
from   tensorflow.keras.layers import Dense, Activation, ZeroPadding2D, BatchNormalization, Conv2D, Permute
from   tensorflow.keras.layers import Lambda, Bidirectional, LSTM, GRU, Dot, Softmax
from   tensorflow.keras.models import Model

from   scipy.io                import wavfile
from   python_speech_features  import mfcc, logfbank
import wave 

import itertools
import cProfile
from IPython.display            import Audio, display, clear_output

# Colab and Drive Libraries
from   google.colab            import drive

# imports for uploading/recording
import ipywidgets as widgets
from dl_colab_notebooks.audio import record_audio, upload_audio

import warnings
warnings.filterwarnings("ignore")

drive.mount('/content/drive') # to attempt to forcibly remount -> force_remount=True

# # paths
FRA_PATH, FEDE_PATH = "/content/drive/MyDrive/University/HDA/HDA_Project/project", "/content/drive/MyDrive/project"

# # Setting the working directory 
# os.chdir(FRA_PATH)
os.chdir(FEDE_PATH)


LABELS_35_CLASSES = {"Backward":0, "Bed":1, "Bird":2, "Cat":3, "Dog":4, "Down":5, "Eight":6, "Five":7, "Follow":8, "Forward":9, "Four":10, "Go":11, "Happy":12, "House":13, "Learn":14, "Left":15, "Marvin":16, "Nine":17, "No":18, "Off":19, "On":20, "One":21, "Right":22, "Seven":23, "Sheila":24, "Six":25, "Stop":26, "Three":27, "Tree":28, "Two":29, "Up":30, "Visual":31, "Wow":32, "Yes":33, "Zero":34}

In [12]:
#@title Either record audio from microphone or upload audio from .wav file  { run: "auto" }

SAMPLE_RATE = 16000
record_or_upload = "Record" #@param ["Record", "Upload"]
architecture =  "AttCNN+RNN"  #@param ["CNN", "CNN+RNN", "AttCNN+RNN"]


def _recognize(audio):
  display(Audio(audio, rate=SAMPLE_RATE, autoplay=True)) 
  audio.resize(SAMPLE_RATE, refcheck=False)
  mfccs = mfcc(audio, samplerate = SAMPLE_RATE, winlen = 0.025, winstep = 0.01, numcep = 13, nfilt=40)
  mfccs = np.array(mfccs).T
  mfccs = np.expand_dims(mfccs, axis=0)
  X_example = np.expand_dims(mfccs, axis=3)
  if architecture == "AttCNN+RNN":
    model = keras.models.load_model('models/att_lstm_model_complete.h5')
  elif architecture == "CNN+RNN":
    model = keras.models.load_model('models/lstm_model_complete.h5')
  else:
    model = keras.models.load_model('models/m_conv2d_n_dense_models/best_flexible_cnn_architecture_model_no_5.h5')
  print("Detecting the keyword...")
  y =  np.argmax(model.predict(X_example),1)
  y = int(y)
  print("Finish!")
  plt.style.use(['dark_background', 'bmh'])
  plt.rc('axes', facecolor='k')
  plt.rc('figure', facecolor='k')
  plt.rc('figure', figsize=(8,2), dpi=100)
  plt.plot(audio)
  plt.title("The word is: " + list(LABELS_35_CLASSES.keys())[y].upper(), fontsize=20)
  plt.xlabel('Sample index')
  plt.ylabel('Amplitude')
  plt.tight_layout()


def _record_audio(b):
  clear_output()
  audio = record_audio(1, sample_rate=SAMPLE_RATE)
  _recognize(audio)

def _upload_audio(b):
  clear_output()
  audio = upload_audio(sample_rate=SAMPLE_RATE)
  _recognize(audio)


if record_or_upload == "Record":
  button = widgets.Button(description="Record Speech")
  button.on_click(_record_audio)
  display(button)
else:
  _upload_audio("")

Button(description='Record Speech', style=ButtonStyle())