# SleepTalk

## Categories:

| Category       | Label  |
|----------------|--------|
| Talk           | SPEECH |
| Snoring        | SNORE  |
| Sighs          | SIGH   |
| Farts          | FART   |
| Loud breathing | BREATH |
| Cough          | COUGH  |

## Setup

In [46]:
import os
import shutil
import tarfile
import urllib.request

import csv

import tensorflow as tf
import numpy as np
from functools import reduce
from concurrent.futures import ProcessPoolExecutor as ConcurrentExecutor

import tf_keras
from tf_keras import Sequential
from tensorflow_hub import KerasLayer
from tf_keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D, Dropout, Dense, Input, Lambda


In [2]:
# define classes TODO: define depending on training data

classes = ["SPEECH", "SNORE", "SIGH", "FART", "BREATH", "COUGH"]
no_class = "NONE"

## Training data collection



In [3]:
google_data_dir = "google"
google_label_file = f"{google_data_dir}/labels.csv"

In [3]:
# download google training embeddings
if os.path.exists(google_data_dir):
    shutil.rmtree(google_data_dir)
    
print("Downloading training features...")
with urllib.request.urlopen("https://storage.googleapis.com/eu_audioset/youtube_corpus/v1/features/features.tar.gz") as response, tarfile.open(fileobj=response, mode='r|gz') as targz:
    targz.extractall(filter='tar')
    os.rename("audioset_v1_embeddings", google_data_dir)

print("done")

Downloading training features...
done


In [4]:
# download label file
if not os.path.exists(google_label_file):
    urllib.request.urlretrieve("https://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/class_labels_indices.csv", google_label_file)


In [5]:
synonyms = {
    classes[0]: ["Speech", "Shout", "Whispering"], # SPEECH
    classes[1]: ["Snoring"], # SNORE
    classes[2]: ["Sigh", "Groan", "Grunt"], # SIGH
    classes[3]: ["Stomach rumble", "Fart"], # FART
    classes[4]: ["Yawn", "Sniff", "Wheeze", "Gasp", "Pant", "Snort"], # BREATH
    classes[5]: ["Cough", "Sneeze"], # COUGH
    no_class: ["Chewing, mastication", "Biting", "Burping, eructation", "Bang", "Slap, smack", "Whack, thwack", "Smash, crash", "Knock", "Tap", "Flap", "Vehicle", "Alarm", "Door", "Thunderstorm", "Wind", "Water", "Noise"]
}

switched_synonyms = {}
for (key, entry) in synonyms.items():
    for synonym in entry:
        switched_synonyms[synonym] = key

synonyms = switched_synonyms

In [6]:
label_to_class = {}
with open(google_label_file, newline='') as label_file:
    reader = csv.reader(label_file)
    for row in filter(lambda r: r[2] in synonyms.keys(), reader):
        label_to_class[int(row[0])] = synonyms[row[2]]


In [7]:
def get_sequence_examples(filename):
    return [tf.train.SequenceExample.FromString(record.numpy()) for record in tf.data.TFRecordDataset(filename)]


def get_labels(sequence):
    return sequence.context.feature["labels"].int64_list.value


def get_label_vector(labels: list[int]):
    class_names = {label_to_class[label] for label in labels if label in label_to_class.keys()}
    label_data = [0 for _ in range(len(classes))]
    for class_name in class_names:
        if class_name != no_class:
            label_data[classes.index(class_name)] = 1
    return np.array(label_data)


def get_embeddings(sequence):
    result = []
    for feature in sequence.feature_lists.feature_list["audio_embedding"].feature:
        byte_list = feature.bytes_list.value[0]
        byte_list = tf.io.decode_raw(byte_list, tf.uint8).numpy()
        byte_list = tf.cast(byte_list, tf.float32).numpy()
        result.append(byte_list / 2 ** 7 - 1)
    return np.array(result)

def get_file_content(filename):
    with_class = []
    without_class = []
    for example in get_sequence_examples(filename):
        labels = get_labels(example)
        if label_to_class.keys().isdisjoint(labels):
            continue
        label_vector = get_label_vector(labels)
        embeddings = get_embeddings(example)
        if max(label_vector) != 0:
            with_class.append((embeddings, label_vector))
        else:
            without_class.append((embeddings, label_vector))
    return with_class, without_class

balanced_train_dir = f"{google_data_dir}/unbal_train"
filenames = [f"{balanced_train_dir}/{file}" for file in os.listdir(balanced_train_dir)]

# TODO: Warnungen unterdrücken 

print("Reading google data...")
with ConcurrentExecutor() as pool:
    intermediate_results = pool.map(get_file_content, filenames)
print("done")
print("Accumulating...")
google_class, google_no_class = reduce(lambda a, b: (a[0] + b[0], a[1] + b[1]), list(intermediate_results))
print("done")

Reading google data...


2024-08-08 15:45:21.490476: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-08 15:45:21.501746: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-08 15:45:21.503798: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-08 15:45:21.529409: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-08 15:45:21.539468: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-08 15:45:21.541461: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-08-08 15:45:21.544981: I tensorflow/core/framework/local_rendezvous.cc:404] L

done
Accumulating...
done


In [8]:
np.random.shuffle(google_class)
np.random.shuffle(google_no_class)

In [9]:
min_len = min(len(google_class), len(google_no_class))
google_class = google_class[:min_len]
google_no_class = google_no_class[:min_len]    

In [10]:
def to_ragged(tensors):
    return tf.ragged.constant(tensors, ragged_rank=1)

def split(a, n):
    k, m = divmod(len(a), n)
    return [a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)]


training_embeddings, training_labels = zip(*[x for y in zip(google_class, google_no_class) for x in y])
print("Creating input tensors...")
with ConcurrentExecutor() as pool:
    training_embeddings = pool.map(to_ragged, split(training_embeddings, 32))
training_embeddings = tf.concat(list(training_embeddings), axis=0).to_tensor()
training_labels = tf.constant(training_labels)
print("done")

Creating input tensors...


2024-08-08 15:52:16.735813: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 32820224 exceeds 10% of free system memory.
2024-08-08 15:52:59.925147: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 32815616 exceeds 10% of free system memory.
2024-08-08 15:53:01.332828: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1050245632 exceeds 10% of free system memory.
2024-08-08 15:53:01.658792: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1055979520 exceeds 10% of free system memory.


done


In [24]:
# TODO: remove google data

## Classification model definition

In [13]:
def get_classifier(weights=None):
    result = Sequential()
    
    result.add(Input(shape=(None, 128), dtype=tf.float32))
    
    # adding layers for classification
    result.add(Conv1D(64, kernel_size=5, strides=1, padding="same", activation="relu", data_format="channels_last"))
    result.add(MaxPooling1D(padding="same", data_format="channels_last"))
    result.add(Dropout(0.3))
    
    result.add(Conv1D(128, kernel_size=5, strides=1, padding="same", activation="relu", data_format="channels_last"))
    result.add(GlobalMaxPooling1D(data_format="channels_last"))
    result.add(Dropout(0.3))
    
    result.add(Dense(256, activation="relu"))
    result.add(Dropout(0.3))
    
    result.add(Dense(512, activation="relu"))
    result.add(Dropout(0.3))
    
    result.add(Dense(len(classes), activation="sigmoid"))
    
    if weights is not None:
        result.set_weights(weights)

    result.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return result

In [14]:
classification_model = get_classifier()

## Model training

In [15]:
early_stop = tf_keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

classification_model.fit(training_embeddings, training_labels, batch_size=32, validation_split=.15, shuffle=True, epochs=30, callbacks=[early_stop])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30


<tf_keras.src.callbacks.History at 0x7394aae1b410>

## Model export

In [16]:
# downloading Googles VGGish model for sound embeddings
# TODO: look into licensing
vggish = KerasLayer("https://www.kaggle.com/models/google/vggish/TensorFlow2/vggish/1")

2024-08-08 15:58:36.699192: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 201326592 exceeds 10% of free system memory.
2024-08-08 15:58:36.708734: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 67108864 exceeds 10% of free system memory.


In [47]:
# adding classification model
model = Sequential([
    vggish,
    Lambda(lambda x: tf.expand_dims(x, axis=0)), # add batch dimension
    classification_model
])

In [56]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with tf.io.gfile.GFile("model.tflite", "wb") as file:
    file.write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmp2edwpg_m/assets


INFO:tensorflow:Assets written to: /tmp/tmp2edwpg_m/assets
W0000 00:00:1723127497.530293   73241 tf_tfl_flatbuffer_helpers.cc:392] Ignored output_format.
W0000 00:00:1723127497.530315   73241 tf_tfl_flatbuffer_helpers.cc:395] Ignored drop_control_dependency.
2024-08-08 16:31:37.530533: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmp2edwpg_m
2024-08-08 16:31:37.532752: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2024-08-08 16:31:37.532763: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmp2edwpg_m
2024-08-08 16:31:37.545355: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-08-08 16:31:37.547256: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2024-08-08 16:31:37.751868: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmp2edwpg_m
2024-0