In [1]:
import numpy as np
import sounddevice as sd
#import tflite_runtime.interpreter as tflite
import tensorflow as tf

from scipy.io.wavfile import write
import scipy.io.wavfile as wav

recorded_audio = []

# set some parameters
block_len_ms = 32 
block_shift_ms = 8
fs_target = 16000

# create the interpreters
interpreter_1 = tf.lite.Interpreter(model_path='./pretrained_model/model_1.tflite')
interpreter_1.allocate_tensors()
interpreter_2 = tf.lite.Interpreter(model_path='./pretrained_model/model_2.tflite')
interpreter_2.allocate_tensors()
# Get input and output tensors.
input_details_1 = interpreter_1.get_input_details()
output_details_1 = interpreter_1.get_output_details()
input_details_2 = interpreter_2.get_input_details()
output_details_2 = interpreter_2.get_output_details()
# create states for the lstms
states_1 = np.zeros(input_details_1[1]['shape']).astype('float32')
states_2 = np.zeros(input_details_2[1]['shape']).astype('float32')
# calculate shift and length
block_shift = int(np.round(fs_target * (block_shift_ms / 1000)))
block_len = int(np.round(fs_target * (block_len_ms / 1000)))
# create buffer
in_buffer = np.zeros((block_len)).astype('float32')
out_buffer = np.zeros((block_len)).astype('float32')


def callback(indata, outdata, frames, time, status):
    # buffer and states to global
    global in_buffer, out_buffer, states_1, states_2
    if status:
        print(status)
        
    # write to buffer
    in_buffer[:-block_shift] = in_buffer[block_shift:]
    in_buffer[-block_shift:] = np.squeeze(indata)
    
    # calculate fft of input block
    in_block_fft = np.fft.rfft(in_buffer)
    in_mag = np.abs(in_block_fft)
    in_phase = np.angle(in_block_fft)
    
    # reshape magnitude to input dimensions
    in_mag = np.reshape(in_mag, (1,1,-1)).astype('float32')
    
    # set tensors to the first model
    interpreter_1.set_tensor(input_details_1[1]['index'], states_1)
    interpreter_1.set_tensor(input_details_1[0]['index'], in_mag)
   
    # run calculation 
    interpreter_1.invoke()
    
    # get the output of the first block
    out_mask = interpreter_1.get_tensor(output_details_1[0]['index']) 
    states_1 = interpreter_1.get_tensor(output_details_1[1]['index'])   
    
    # calculate the ifft
    estimated_complex = in_mag * out_mask * np.exp(1j * in_phase)
    estimated_block = np.fft.irfft(estimated_complex)
    
    # reshape the time domain block
    estimated_block = np.reshape(estimated_block, (1,1,-1)).astype('float32')
    
    # set tensors to the second block
    interpreter_2.set_tensor(input_details_2[1]['index'], states_2)
    interpreter_2.set_tensor(input_details_2[0]['index'], estimated_block)
    
    # run calculation
    interpreter_2.invoke()
    
    # get output tensors
    out_block = interpreter_2.get_tensor(output_details_2[0]['index']) 
    states_2 = interpreter_2.get_tensor(output_details_2[1]['index']) 
    
    # write to buffer
    out_buffer[:-block_shift] = out_buffer[block_shift:]
    out_buffer[-block_shift:] = np.zeros((block_shift))
    out_buffer  += np.squeeze(out_block)
    
    # output to soundcard
    outdata[:] = np.expand_dims(out_buffer[:block_shift], axis=-1)
    recorded_audio.append(outdata)

#choose the default input and output devices
input_device = 23
output_device = 23

try:
    with sd.Stream(device=(input_device, output_device),
                   samplerate=fs_target, blocksize=block_shift,
                   dtype=np.float32, latency=0.2,
                   channels=1, callback=callback):
        print('#' * 80)
        print('press Return to quit')
        print('#' * 80)
        input()

except KeyboardInterrupt:
    parser.exit('')

2023-04-24 14:29:46.997886: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-24 14:29:47.152302: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-24 14:29:47.734564: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-24 14:29:47.737113: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


################################################################################
press Return to quit
################################################################################



In [2]:
recorded_audio

[array([[ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 2.8025969e-45],
        [ 0.0000000e+00],
        [-1.0297101e-35],
        [ 3.0932262e-41],
        [ 2.5223372e-44],
        [ 0.0000000e+00],
        [ 3.7594303e-31],
        [ 4.5694942e-41],
        [ 2.2263927e-24],
        [ 4.5694942e-41],
        [ 2.8106420e-24],
        [ 4.5694942e-41],
        [ 2.2199303e-24],
        [ 4.5694942e-41],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [-8.1669445e-32],
        [ 3.0932262e-41],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [           nan],
        [ 1.4012985e-45],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 1.2331426e-43],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.0000000e+00],
        [ 0.