# Keyword spoting Inference

In [7]:
import tensorflow.keras as keras
import numpy as np
import librosa

MODEL_PATH = "model.h5"
# A better way for of storing constants for the ML system 
# is having an external confix file for consistency purposes.
NUM_SAMPLES_TO_CONSIDER = 22050 # 1 sec worth of sound in librosa

In [14]:
# Singleton class - a calss that only can have one instance in the srvice.
class _Keyword_Spotting_Service:
    
    model = None
    #Mapping taken from the dataset json file
    _mappings = [
        "five",
        "four",
        "go",
        "no",
        "off",
        "on",
        "one",
        "stop",
        "three",
        "tree",
        "two",
        "wow",
        "yes",
        "_background_noise_"
    ]
    
    # Instance of the class - PYthon does not enforce Singleton class, to we need to do it manually
    _instance = None
    
    def predict(self, file_path):
        
        # Extract MFCCs
        MFCCs = self.preprocess(file_path) # (#(44) segments, #(13) coefficients)
        
         # Convert 2d MFCCs arry into 4d array -> (# samples, # segments, # coefficients, # channels)
        MFCCs = MFCCs[np.newaxis, ..., np.newaxis]
        
        # Make prediction
        predictions = self.model.predict(MFCCs) # [ [0.1, 0.6, 0.1, 0.2] ]
        predicted_index = np.argmax(predictions) # 1
        predicted_keyword = self._mappings[predicted_index]
        
        return predicted_keyword
        
    def preprocess(self, file_path, n_mfcc=13, n_fft=2048, hop_length=512):
        
        # Load audio file
        signal, sr = librosa.load(file_path)
        
        # Ensure consistency in the audio file length
        if len(signal) > NUM_SAMPLES_TO_CONSIDER:
            signal = signal[:NUM_SAMPLES_TO_CONSIDER]
        
        # Extract MFCCs
        MFCCs = librosa.feature.mfcc(y = signal, n_mfcc = n_mfcc, n_fft = n_fft, hop_length = hop_length)

        # Transpose the matrix
        return MFCCs.T
    


In [20]:
def Keyword_Spotting_Service():
    # Ensure only one instance of KSS is created
    if _Keyword_Spotting_Service._instance is None:
        _Keyword_Spotting_Service._instance = _Keyword_Spotting_Service()
        _Keyword_Spotting_Service.model = keras.models.load_model(MODEL_PATH)

    return _Keyword_Spotting_Service._instance

if __name__ == "__main__":
    
    kss = Keyword_Spotting_Service()
    
    keyword1 = kss.predict("testSamples\\five.wav")
    keyword2 = kss.predict("testSamples\\go.wav")
    keyword3 = kss.predict("testSamples\\no.wav")
    keyword4 = kss.predict("testSamples\\noise.wav")
    keyword5 = kss.predict("testSamples\\off.wav")
    keyword6 = kss.predict("testSamples\\on.wav")
    keyword7 = kss.predict("testSamples\\stop.wav")
    keyword8 = kss.predict("testSamples\\three.wav")
    
    print(f"Predicted keyword: {keyword1}, {keyword2}, {keyword3}, {keyword4}, {keyword5}, {keyword6}, {keyword7}, {keyword8}")

Predicted keyword: five, no, no, off, off, on, stop, three
