In [1]:
import numpy as np
import speaker_recog as spr
import os
import python_speech_features
import tensorflow as tf
from tensorflow import keras
import sounddevice as sd
from timeit import default_timer as timer
from IPython.display import clear_output
import speech_recognition as sr

In [2]:
def audio2feature(audio):
    features = python_speech_features.base.mfcc( audio, 
                                                 samplerate = 16000,
                                                 winlen = 0.025,
                                                 winstep = 0.01,
                                                 numcep = 20,
                                                 nfilt = 20,
                                                 nfft = 2048,
                                                 preemph = 0,
                                                 ceplifter = 0,
                                                 appendEnergy = False,
                                                 winfunc = np.hanning)
    return features.transpose()

In [3]:
model = keras.models.load_model('cmd_recog_model.h5')

In [4]:
'''dataset_path = './dataset'
targets = [name for name in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, name))]
print(targets)'''
targets = ['on', 'two', 'one', 'three', 'off']

In [5]:
def predict_cmd():
    print("Listening for command...")
    cmd_rec = sd.rec(samplerate=16000, 
                       channels=1,
                       dtype="float32",
                       frames=16000 * 1)
    sd.wait()
    print("Recording stopped")
    recorded_feature = audio2feature(cmd_rec)
    recorded_feature = np.float32(recorded_feature.reshape(1, recorded_feature.shape[0], recorded_feature.shape[1], 1))
    prediction = model.predict(recorded_feature).reshape((5, ))
    prediction /= prediction.sum()
    best_candidate_index = prediction.argmax()
    best_candidate_probability = prediction[best_candidate_index]
    word = targets[best_candidate_index]
    speak_verif = spr.test_model(cmd_rec, 16000)
    if speak_verif == True:
        return word
    else:
        return None

In [6]:
r = sr.Recognizer()
mic = sr.Microphone()
def speech_to_text():
    while True:
        with mic as source:
            r.adjust_for_ambient_noise(source)
            audio = r.listen(source, phrase_time_limit=1)
        audioText = r.recognize_google(audio)
        if audioText == "motor":
            print(audioText)
            cmd = predict_cmd()
            if cmd != None:
                print("Recieved command : ", cmd)
            else:
                print("Invalid User.")
        else:
            print("Wakeword not detected!")
            
def main_func():
    try:
        speech_to_text()
    except Exception as e:
        print("Wakeword not detected!")
        main_func()

if __name__ == "__main__":
    main_func()

motor
Listening for command...
Recording stopped
[-29.28752647 -29.15099495 -28.50129601]
	detected as - amrish
Recieved command :  on
motor
Listening for command...
Recording stopped
[-30.86507432 -29.79202436 -30.11467468]
	detected as - anirudh
Recieved command :  off
motor
Listening for command...
Recording stopped
[-32.03258736 -31.83597505 -31.25543337]
	detected as - amrish
Recieved command :  one
Wakeword not detected!
Wakeword not detected!
Wakeword not detected!
motor
Listening for command...
Recording stopped
[-30.97579694 -30.89726361 -28.49485677]
	detected as - amrish
Recieved command :  two
Wakeword not detected!
motor
Listening for command...
Recording stopped
[-30.58917782 -28.75567322 -29.12212245]
	detected as - anirudh
Recieved command :  three
motor
Listening for command...
Recording stopped
[-28.63199699 -28.07868217 -28.37474382]
	detected as - anirudh
Recieved command :  on
motor
Listening for command...
Recording stopped
[-29.9246165  -30.27576675 -27.59892246]

KeyboardInterrupt: 