In [1]:
import tensorflow.keras as keras
import numpy as np
import librosa

ModuleNotFoundError: No module named 'librosa'

In [None]:
LOAD_MODEL = "SE-ResNet_for_disfluency.hdf5"

NUM_SAMPLES_TO_CONSIDER = 16000

In [None]:
class _Disfluency_Spotting_Service:
    
    model = None
    _mappings = [
        "Breath",
        "Laughter",
        "Music",
        "Uh",
        "Um",
        "Words"
    ]
    
    _instance = None
    
    
    
    def preprocess(self, file_path, n_fft = 400, hop_length = 160 ):
        
        # load audio file
        signal, sr = librosa.load(file_path, sr = 16000,
                              mono = mono)[0]
        
        #ensure consistency in audio file length
        if len(signal) > NUM_SAMPLES_TO_CONSIDER:
            signal = signal[:NUM_SAMPLES_TO_CONSIDER]
        
        #extract spectogram
        stft = librosa.stft(signal,
                            n_fft = self.frame_size,
                            hop_length = self.hop_length)
        spectogram = np.abs(stft)
        log_spectogram = librosa.amplitude_to_db(spectogram)
        return log_spectogram

    
    
    
    def predict(self, file_path):
        
        #extract septograms
        septograms = self.preprocess(file_path, n_fft = 400, hop_length = 160) # (n_bins, n_frames, 1)
        
        #convert 3d sep array into 4d array -- (#samples, #segments, #coefficients, #channels)
        seps = septograms[..., np.newaxis]
        
        #make prediction
        predictions = self.model.predict(seps) # [ [0.1,0.6, 0.1, ...] ]
        predicted_index = np.argmax(predictions)
        
        predicted_keyword = self.mappings[predicted_index]
        
        return predicted_keyword
    

In [None]:
def Disfluency_Spotting_Service():
    
    
    if _Disfluency_Spotting_Service. _instance is None:
        _Disfluency_Spotting_Service. _instance = _Disfluency_Spotting_Service()
        _Disfluency_Spotting_Service.model = keras.models.load_model(LOAD_MODEL)
    return _Disfluency_Spotting_Service. _instance

In [None]:
if __name__ == "__main__":
    
    dss = Disfluency_Spotting_Service()
    keyword1 = dss.predict("test/sample.wav")
    print(f"predicted keywords: {keyword1}")