## Example script illustrating sound classification on audio stream
This notebook is an example how to use DeGirum PySDK to do sound classification AI inference of an audio stream from local microphone.


In [None]:
import degirum as dg # import DeGirum PySDK
import mytools
import sys
import numpy as np
from IPython.display import clear_output

### Specify inference option

In [None]:
# Please uncomment and edit one of the following inference options to specify your system configuration case according to
# https://cs.degirum.com/doc/0.5.0/degirum.html#system-configuration-for-specific-use-cases

# 1. DeGirum Cloud Zoo inference:
#zoo = dg.connect_model_zoo("dgcps://cs.degirum.com", token=mytools.token_get())

# 2. AIServer inference via IP address using models from DeGirum Cloud model zoo
#zoo = dg.connect_model_zoo(("192.168.0.7", "https://cs.degirum.com/degirum_com/public"), token=mytools.token_get())

# 3. AIServer inference via IP address using local model zoo
#zoo = dg.connect_model_zoo("192.168.0.1")

# 4. ORCA board installed locally using models from DeGirum Cloud Model Zoo
#zoo = dg.connect_model_zoo("https://cs.degirum.com/degirum_com/public", token=mytools.token_get())

# 5. Local inference with locally deployed model
#zoo = dg.connect_model_zoo("full/path/to/model.json")

In [None]:
# load YAMNET sound classification model for DeGirum Orca AI accelerator
# (change model name to "...n2x_cpu_1" to run it on CPU)
model = zoo.load_model("mobilenet_v1_yamnet_sound_cls--96x64_quant_n2x_orca_1")

In [None]:
abort = False # stream abort flag
N = 5 # inference results history depth
history = [] # list of N consecutive inference results

sampling_rate_hz = model.model_info.InputSamplingRate[0]
read_buffer_size = model.model_info.InputWaveformSize[0] // 2 # two read buffers in waveform for half-length overlapping

# Acquire model input stream object
with mytools.open_audio_stream(sampling_rate_hz, read_buffer_size) as stream:
    #
    # AI prediction loop.
    # emit keyboard typing sound to stop
    #
    for res in model.predict_batch(mytools.audio_overlapped_source(stream, lambda: abort)):
        # clear Jupyter output cell
        clear_output(wait = True) 
        
        # add top inference result to history
        history.insert(0, f"{res.results[0]['label']}: {res.results[0]['score']}" )
    
        # keep only N last elements in history
        if len(history) > N:
            history.pop()
    
        # print history
        for m in history:
            print(m)
        
        # check for stop condition
        if res.results[0]['label'] == "Typing":
            abort = True
    