In [1]:
import tensorflow as tf
import sounddevice as sd
import numpy as np
import IPython.display as ipd
import scipy as sp
import scipy.signal
import librosa as li
from preprocessing import Audio

In [2]:
tf.config.set_visible_devices(tf.config.list_physical_devices()[0])
tf.config.list_logical_devices()

[LogicalDevice(name='/device:CPU:0', device_type='CPU'),
 LogicalDevice(name='/device:XLA_CPU:0', device_type='XLA_CPU')]

In [211]:
sd.default.samplerate = 16000
sd.default.channels = 1

In [210]:
def get_energy(x, sr=16000):
    x, _ = li.effects.trim(x)
    x = tf.keras.preprocessing.sequence.pad_sequences(
        [x],
        maxlen=int(sr * 1.0),
        padding='post',
        truncating='post',
        dtype='float32'
    )[0]
    coeff = sp.signal.firwin(999, [260, 700], fs=sr, pass_zero=False)
    x_filtered = sp.signal.lfilter(coeff, 1.0, x)
    x_normalized = x_filtered / x_filtered.max()
    x_squared = np.square(x_normalized)
    splited = np.array_split(x_squared, 200)
    e_parts = np.empty((0))
    for part in splited:
        e_parts = np.append(e_parts, sp.integrate.simps(part))
    return e_parts

In [224]:
path_to_model = 'models/v4/MiraGRUV4.3.2-1.h5'
model = tf.keras.models.load_model(path_to_model)
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.Precision(name='precision'),
    ]
)
model.summary()

Model: "MiraGRUV4.3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization_2 (Normalizati (None, 200, 1)            3         
_________________________________________________________________
gru_22 (GRU)                 (None, 200)               121800    
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 201       
Total params: 122,004
Trainable params: 122,001
Non-trainable params: 3
_________________________________________________________________


In [425]:
data = sd.rec(int(1.0 * sd.default.samplerate), blocking=True)

In [433]:
class_sum = 0
parts = 0
prediction_sum = 0

def pad(data):
    zero_padding = tf.zeros((16000 - tf.shape(data)[0], 1), dtype=tf.float32)
    return tf.concat([data, zero_padding], 0)

def predictions(model, data):
    predictions = []
    for parts in range(0, 16000, 4000):
        e_parts = get_energy(np.reshape(data[parts:], (16000-parts)))
        predictions.append(model(np.reshape(e_parts, (1, 200)))[0][0].numpy())
    return predictions

def callback(indata, outdata, frames, time, status):
    # outdata[:] = indata
    global data, model, class_sum, parts, prediction_sum
    
    parts += 1
    data = data[frames:]
    data = np.append(data, indata, axis=0)
    e_parts = get_energy(np.reshape(data, (16000)))
    preds = predictions(model, data)
    print(preds)
    ipd.clear_output(wait=True)
    
    if preds[1] > 0.5 and preds[2] > 0.5:
        sd.play(data)
        print('Mira {:.3f}'.format(preds[1]+preds[2]/2))
            
    
block = int(sd.default.samplerate * 0.5)
stream = sd.Stream(blocksize=block, callback=callback)

In [434]:
stream.start()

[0.01202178, 0.25935924, 0.44987747, 0.97633064]


In [435]:
stream.stop()

[1.8140767e-05, 0.036840796, 0.58995897, 0.02207312]


In [60]:
data.shape

(44100, 1)

In [32]:
model.input_shape

(None, 200, 1)

In [426]:
parts = int(16000 * 0.15 * 0)
sd.play(data[parts:])

In [313]:
%%time
e_parts = get_energy(np.reshape(data[parts:], (16000-parts)))
model(np.reshape(e_parts, (1, 200)))

Wall time: 116 ms


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.01795262]], dtype=float32)>

In [419]:
predictions(model, data)

[0.0022153258, 0.0071291924, 0.011405498, 0.057645947]

In [436]:
%%time
predictions(model, data)

Wall time: 446 ms


[1.8140767e-05, 0.036840796, 0.58995897, 0.02207312]