This notebook is a simple example how to use DeGirum PySDK to do AI inference of an audio stream from local microphone. PyAudio is required to run this sample.

In [None]:
import sys
import degirum as dg  # import DeGirum PySDK
import numpy as np
from IPython.display import clear_output

In [None]:
# connect to model zoo
zoo = dg.connect_model_zoo()

# load model
model = zoo.load_model("mobilenet_v1_yamnet_sound_cls--96x64_quant_n2x_orca_1")

In [None]:
# Define model-specific audio streaming function
# TL;DR: 
# We define context manager function, which opens PyAudio stream on enter, reads it and yields audio waveforms
# of proper type, proper size, and with proper overlap. It properly closes PyAudio stream on exit.
# You pass model parameters and check-for-abort function as arguments.

import pyaudio
from contextlib import contextmanager

@contextmanager 
def AudioStream( model_info, check_abort ):
    chunk_length = model_info.InputWaveformSize[0] // 2
    audio = pyaudio.PyAudio()
    stream = audio.open(format = pyaudio.paInt16, channels = 1,
            rate = int(model_info.InputSamplingRate[0]), input = True, frames_per_buffer = chunk_length)
    
    data = np.zeros(2 * chunk_length, dtype = np.int16)
    try:
        def out_stream():
            while not check_abort():
                data[:chunk_length] = data[chunk_length:]
                data[chunk_length:] = np.frombuffer(stream.read(chunk_length), dtype = np.int16)
                yield data
        yield out_stream
    finally:
        stream.stop_stream() # stop audio streaming
        stream.close() # close audio stream
        audio.terminate() # terminate audio library


In [None]:
abort = False # stream abort flag
N = 5 # inference results history depth
history = [] # list of N consecutive inference results

# Acquire model input stream object
with AudioStream(model.model_info, lambda: abort) as stream:
    #
    # AI prediction loop.
    # emit keyboard typing sound to stop
    #
    for res in model.predict_batch(stream()):
        # clear Jupyter output cell
        clear_output(wait = True) 
        
        # add top inference result to history
        history.insert(0, f"{res.results[0]['label']}: {res.results[0]['score']}" )
    
        # keep only N last elements in history
        if len(history) > N:
            history.pop()
    
        # print history
        for m in history:
            print(m)
        
        # check for stop condition
        if res.results[0]['label'] == "Typing":
            abort = True
    