In [3]:
import speech_recognition as sr
import random
import time
print("Speech recognition version: ", sr.__version__)
r = sr.Recognizer()

aud_1 = "audio_files_harvard.wav"
aud_2 = "audio_files_jackhammer.wav"

Speech recognition version:  3.8.1


In [4]:
### Reading from file
#clean audio file
harvard = sr.AudioFile(aud_1)

#audio file containing noise
jackhammer = sr.AudioFile(aud_2)

#capturing the complete segment
with harvard as source:
  audio = r.record(source)
type(audio)
complete_segment = r.recognize_google(audio)
print("Complete segment: ", complete_segment)

#capturing a segement with offset and duration
with harvard as source:
  #capture first 4 seconds of the file
  audio = r.record(source, duration=4)
  #capture 4 seconds after the first four seconds
  audio2 = r.record(source, duration=4)
segment = r.recognize_google(audio)
print("First four seconds: ", segment)
segment = r.recognize_google(audio2)
print("Four seconds after first four: ", segment)

with harvard as source:
  #starts from 4.7 segments and captures for a further 2.8 seconds from here
  audio = r.record(source, offset=4.7, duration=2.8)
segment = r.recognize_google(audio)

print("Capture with 4.7 offset and 2.8: ", segment) #depending on offset, you may get incorrect (cut) transcription

with jackhammer as source:
  #calibrates the recognizer to the noise level of the audio after reading the first second
  r.adjust_for_ambient_noise(source, duration=0.5)
  audio = r.record(source)

segment = r.recognize_google(audio)
print("Adjusted for ambient noise: ", segment)

### When there's noise in audio
### it is better to check the JSON
### response from the API
segment = r.recognize_google(audio, show_all=True)
print("JSON String: ", segment)

Complete segment:  the still smell of old beer drinkers it takes hi to bring out the order I called it yourself invest a salt a kotess find the M tacos Al pastor my favourite is just for food is Bihar cross bun
First four seconds:  the still smell of old beer drinkers
Four seconds after first four:  ethics he to bring out the order I called it
Capture with 4.7 offset and 2.8:  excreta bring out the order
Adjusted for ambient noise:  smell during periods
JSON String:  {'alternative': [{'transcript': 'smell during periods', 'confidence': 0.79938751}, {'transcript': 'smell during sex'}, {'transcript': 'smell during rains'}, {'transcript': 'smell'}, {'transcript': 'smell during'}], 'final': True}


In [25]:
### reading from mic
### This will not work on Google Colab
### Local machine with a microphone is required to run it
### Run sr.Microphone.list_microphone_names() to check list of devices attached

def recognize_speech_from_mic(recognizer, microphone):
    if not isinstance(recognizer, sr.Recognizer):
        raise TypeError("`recognizer` must be `Recognizer` instance")
    if not isinstance(microphone, sr.Microphone):
        raise TypeError("`microphone` must be `Microphone` instance")
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
    print("Interpreting...")
    response = {
        "success": True,
        "error": None,
        "transcription": None
    }
    try:
        response["transcription"] = recognizer.recognize_google(audio)
    except sr.RequestError:
        # API was unreachable or unresponsive
        response["success"] = False
        response["error"] = "API unavailable"
    except sr.UnknownValueError:
        # speech was unintelligible
        response["error"] = "Unable to recognize speech"

    return response

#sr.Microphone.list_microphone_names()
mic = sr.Microphone()
#with mic as source:
#    r.adjust_for_ambient_noise(source)
#    audio = r.listen(source)
#r.recognize_google(audio)


PROMPT_LIMIT = 5
recognizer = sr.Recognizer()
microphone = sr.Microphone()

print('Hi, I am the door and I listen to everything :-)')

# if a transcription is returned, break out of the loop and
#     continue
# if no transcription returned and API request failed, break
#     loop and continue
# if API request succeeded but no transcription was returned,
#     repeat PROMPT_LIMIT times

for j in range(PROMPT_LIMIT):
    print('Listening...')
    listen = recognize_speech_from_mic(recognizer, microphone)
    if listen["transcription"]:
        break
    if not listen["success"]:
        break

if listen["error"]:
    print("I need a break :(")
    ## print("ERROR: {}".format(guess["error"]))

else:
    keyword = listen["transcription"].lower()
    print("You said: {}".format(keyword))
    if keyword == 'close':
        time.sleep(1)
        print('.')
        time.sleep(1)
        print('.')
        time.sleep(1)
        print('.')
        time.sleep(1)
        print("Door closed!")

    elif keyword == 'open':
        time.sleep(1)
        print('.')
        time.sleep(1)
        print('.')
        time.sleep(1)
        print('.')
        time.sleep(1)    
        print("Door opened!")

    else:
        time.sleep(1)
        print('.')
        time.sleep(1)
        print('.')
        time.sleep(1)
        print('.')
        time.sleep(1)
        print('You kid, play somewhere else!')


Hi, I am the door and I listen to everything :-)
Listening...
Interpreting...
You said: close
.
.
.
Door closed!
