# SleepTalk

## Categories:

| Category       | Label  |
|----------------|--------|
| Talk           | SPEECH |
| Snoring        | SNORE  |
| Sighs          | SIGH   |
| Farts          | FART   |
| Loud breathing | BREATH |
| Cough          | COUGH  |

## Setup

In [1]:
import os
import shutil
import tarfile
import urllib.request

import csv

import tensorflow as tf
import numpy as np
from functools import reduce
from concurrent.futures import ProcessPoolExecutor as ConcurrentExecutor

import tf_keras
from tf_keras import Sequential
from tensorflow_hub import KerasLayer
from tf_keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D, Dropout, Dense, Input, Rescaling


2024-08-07 16:28:00.144138: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-07 16:28:00.150325: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-07 16:28:00.168987: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-07 16:28:00.192781: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-07 16:28:00.199672: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-07 16:28:00.218054: I tensorflow/core/platform/cpu_feature_gu

In [2]:
# define classes TODO: define depending on training data

classes = ["SPEECH", "SNORE", "SIGH", "FART", "BREATH", "COUGH"]
no_class = "NONE"

## Training data collection



In [3]:
google_data_dir = "google"
google_label_file = f"{google_data_dir}/labels.csv"

In [3]:
# download google training embeddings
if os.path.exists(google_data_dir):
    shutil.rmtree(google_data_dir)
    
print("Downloading training features...")
with urllib.request.urlopen("https://storage.googleapis.com/eu_audioset/youtube_corpus/v1/features/features.tar.gz") as response, tarfile.open(fileobj=response, mode='r|gz') as targz:
    targz.extractall(filter='tar')
    os.rename("audioset_v1_embeddings", google_data_dir)

print("done")

Downloading training features...
done


In [4]:
# download label file
if not os.path.exists(google_label_file):
    urllib.request.urlretrieve("https://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/class_labels_indices.csv", google_label_file)


In [5]:
synonyms = {
    classes[0]: ["Speech", "Shout", "Whispering"], # SPEECH
    classes[1]: ["Snoring"], # SNORE
    classes[2]: ["Sigh", "Groan", "Grunt"], # SIGH
    classes[3]: ["Stomach rumble", "Fart"], # FART
    classes[4]: ["Yawn", "Sniff", "Wheeze", "Gasp", "Pant", "Snort"], # BREATH
    classes[5]: ["Cough", "Sneeze"], # COUGH
    no_class: ["Chewing, mastication", "Biting", "Burping, eructation", "Bang", "Slap, smack", "Whack, thwack", "Smash, crash", "Knock", "Tap", "Flap", "Vehicle", "Alarm", "Door", "Thunderstorm", "Wind", "Water", "Noise"]
}

switched_synonyms = {}
for (key, entry) in synonyms.items():
    for synonym in entry:
        switched_synonyms[synonym] = key

synonyms = switched_synonyms

In [6]:
label_to_class = {}
with open(google_label_file, newline='') as label_file:
    reader = csv.reader(label_file)
    for row in filter(lambda r: r[2] in synonyms.keys(), reader):
        label_to_class[int(row[0])] = synonyms[row[2]]


In [None]:
def get_sequence_examples(filename):
    return [tf.train.SequenceExample.FromString(record.numpy()) for record in tf.data.TFRecordDataset(filename)]


def get_labels(sequence):
    return sequence.context.feature["labels"].int64_list.value


def get_label_vector(labels: list[int]):
    class_names = {label_to_class[label] for label in labels if label in label_to_class.keys()}
    label_data = [0 for _ in range(len(classes))]
    for class_name in class_names:
        if class_name != no_class:
            label_data[classes.index(class_name)] = 1
    return np.array(label_data)


def get_embeddings(sequence):
    result = []
    for feature in sequence.feature_lists.feature_list["audio_embedding"].feature:
        byte_list = feature.bytes_list.value[0]
        result.append(tf.io.decode_raw(byte_list, tf.uint8).numpy())
    return np.array(result)

def get_file_content(filename):
    with_class = []
    without_class = []
    for example in get_sequence_examples(filename):
        labels = get_labels(example)
        if label_to_class.keys().isdisjoint(labels):
            continue
        label_vector = get_label_vector(labels)
        embeddings = get_embeddings(example)
        if max(label_vector) != 0:
            with_class.append((embeddings, label_vector))
        else:
            without_class.append((embeddings, label_vector))
    return with_class, without_class

balanced_train_dir = f"{google_data_dir}/unbal_train"
filenames = [f"{balanced_train_dir}/{file}" for file in os.listdir(balanced_train_dir)]

# TODO: Warnungen unterdrücken 

print("Reading google data...")
with ConcurrentExecutor() as pool:
    intermediate_results = pool.map(get_file_content, filenames)
print("done")
print("Accumulating...")
google_class, google_no_class = reduce(lambda a, b: (a[0] + b[0], a[1] + b[1]), list(intermediate_results))
print("done")

Reading google data...


2024-08-07 16:28:09.456179: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-07 16:28:09.467212: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-07 16:28:09.485132: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-07 16:28:09.488757: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-07 16:28:09.502968: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-07 16:28:09.532753: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-07 16:28:09.536928: I tensorflow/core/framework/local_rendezvous.cc:404] L

done
Accumulating...


In [None]:
np.random.shuffle(google_class)
np.random.shuffle(google_no_class)

In [None]:
min_len = min(len(google_class), len(google_no_class))
google_class = google_class[:min_len]
google_no_class = google_no_class[:min_len]    

In [None]:
def to_ragged(tensors):
    return tf.ragged.constant(tensors, ragged_rank=1)

def split(a, n):
    k, m = divmod(len(a), n)
    return [a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)]


training_embeddings, training_labels = zip(*[x for y in zip(google_class, google_no_class) for x in y])
print("Creating input tensors...")
with ConcurrentExecutor() as pool:
    training_embeddings = pool.map(to_ragged, split(training_embeddings, 32))
training_embeddings = tf.concat(list(training_embeddings), axis=0).to_tensor()
training_labels = tf.constant(training_labels)
print("done")

In [24]:
# TODO: remove google data

## Classification model definition

In [None]:
def get_classifier(weights=None):
    result = Sequential()
    
    result.add(Input(shape=(None, 128), dtype=tf.uint8))
    result.add(Rescaling(1./255))
    
    # adding layers for classification
    result.add(Conv1D(64, kernel_size=5, strides=1, padding="same", activation="relu", data_format="channels_last"))
    result.add(MaxPooling1D(padding="same", data_format="channels_last"))
    result.add(Dropout(0.3))
    
    result.add(Conv1D(128, kernel_size=5, strides=1, padding="same", activation="relu", data_format="channels_last"))
    result.add(GlobalMaxPooling1D(data_format="channels_last"))
    result.add(Dropout(0.3))
    
    result.add(Dense(256, activation="relu"))
    result.add(Dropout(0.3))
    
    result.add(Dense(512, activation="relu"))
    result.add(Dropout(0.3))
    
    result.add(Dense(len(classes), activation="sigmoid"))
    
    if weights is not None:
        result.set_weights(weights)

    result.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return result

In [None]:
classification_model = get_classifier()

## Model training

In [None]:
early_stop = tf_keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

classification_model.fit(training_embeddings, training_labels, batch_size=32, validation_split=.15, shuffle=True, epochs=30, callbacks=[early_stop])

## Model export

In [None]:
# downloading Googles VGGish model for sound embeddings
# TODO: look into licensing
vggish = KerasLayer("https://www.kaggle.com/models/google/vggish/TensorFlow2/vggish/1")

In [14]:
# adding classification model
model = Sequential([
    vggish,
    classification_model
])

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with tf.io.gfile.GFile("model.tflite", "wb") as file:
    file.write(tflite_model)

TypeError: Exception encountered when calling layer 'keras_layer_8' (type KerasLayer).

Binding inputs to tf.function failed due to `Can not cast TensorSpec(shape=(4, 5), dtype=tf.float32, name=None) to TensorSpec(shape=(None,), dtype=tf.float32, name=None)`. Received args: ([[<tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>], [<tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>], [<tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>], [<tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>]],) and kwargs: {} for signature: (waveform: TensorSpec(shape=(None,), dtype=tf.float32, name=None)).

Call arguments received by layer 'keras_layer_8' (type KerasLayer):
  • inputs=[['tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)'], ['tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)'], ['tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)'], ['tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)', 'tf.Tensor(shape=(), dtype=float32)']]
  • training=None