In [1]:
import tensorflow as tf
import numpy as np
import librosa
import os
import tqdm
import shutil

2024-04-27 03:56:56.349837: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def copy_folder_structure(source_folder, destination_folder):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    for item in os.listdir(source_folder):
        source_item = os.path.join(source_folder, item)
        destination_item = os.path.join(destination_folder, item)

        if os.path.isdir(source_item):
            copy_folder_structure(source_item, destination_item)

In [3]:
def move_files_based_on_list(source_folder, destination_folder, file_list):
    for filename in file_list:
        filename = filename[:-1]
        source_file = os.path.join(source_folder, filename)
        destination_file = os.path.join(destination_folder, filename)

        if os.path.exists(source_file):
            shutil.move(source_file, destination_file)

In [4]:
def make_dataset(source_folder):
    destination_folder = "../data/test"
    copy_folder_structure(source_folder, destination_folder)
    with open("../data/train/testing_list.txt", 'r') as f:
        testing_files = f.readlines()
    move_files_based_on_list(source_folder, destination_folder, testing_files)

    destination_folder = "../data/val"
    copy_folder_structure(source_folder, destination_folder)
    with open("../data/train/validation_list.txt", 'r') as f:
        validation_files = f.readlines()
    move_files_based_on_list(source_folder, destination_folder, validation_files)

In [5]:
# creates test and val folder so it is easy to load the data with tf.keras.utils.audio_dataset_from_directory
make_dataset("../data/train/audio")

In [6]:
ds_train = tf.keras.utils.audio_dataset_from_directory(
    directory='../data/train/audio',
    batch_size=256,
    seed=1337,
)

Found 51088 files belonging to 30 classes.


2024-04-27 03:57:11.670870: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-27 03:57:12.128505: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-27 03:57:12.128583: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-27 03:57:12.134980: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-27 03:57:12.135067: I external/local_xla/xla/stream_executor

In [7]:
ds_val = tf.keras.utils.audio_dataset_from_directory(
    directory='../data/val/',
    batch_size=256,
    seed=1337,
)

Found 6798 files belonging to 30 classes.


In [8]:
ds_test = tf.keras.utils.audio_dataset_from_directory(
    directory='../data/test/',
    batch_size=256,
    seed=1337,
)

Found 6835 files belonging to 30 classes.


In [9]:
for x, y in ds_train.take(1):
    print(x.shape)
    print(y.shape)

(256, 16000, 1)
(256,)


2024-04-27 03:57:19.910587: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [10]:
def get_spectrograms(waveforms, labels):
  waveforms = tf.reshape(waveforms, [-1, 16000])
  spectrogram = tf.signal.stft(
      waveforms, frame_length=255, frame_step=128)
  spectrogram = tf.abs(spectrogram)
  spectrogram = spectrogram[..., tf.newaxis]
  return spectrogram, labels

In [11]:
spec, label = get_spectrograms(x, y)
spec.shape, label.shape

(TensorShape([256, 124, 129, 1]), TensorShape([256]))

In [12]:
ds_train = ds_train.map(get_spectrograms)
ds_val = ds_val.map(get_spectrograms)
ds_test = ds_test.map(get_spectrograms)

In [13]:
for x, y in ds_train.take(1):
    break
x.shape, y.shape

(TensorShape([256, 124, 129, 1]), TensorShape([256]))

In [21]:
# this will probably end up in utils.py or sth like that
from typing import Dict, List, Union
import pandas as pd
def get_callbacks(path: str) -> List[tf.keras.callbacks.Callback]:
    early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor="val_accuracy", patience=4
    )
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        path, save_best_only=True, monitor="val_accuracy", mode="max"
    )
    return [early_stopping, checkpoint]

def eval_and_save(
    model_type: str,
    ds_test: tf.data.Dataset,
    config: Dict[str, Union[int, str, float]],
    history: Dict[str, List[float]],
    path: str,
) -> None:
    model = tf.keras.models.load_model("../models/" + path)
    loss, acc = model.evaluate(ds_test)

    history = pd.DataFrame(history.history)
    history.to_csv(f"../history/{path.split('.')[0]}.csv")

    with open("../results/results.csv", "a") as f:
        f.write(f"{model_type};{model.count_params()};{loss};{acc};{config};{path}\n")

In [16]:
path = "CNN2D_1.keras"
model_type = "CNN2D"
# not sure what to put here, but for now this will do
config = {
    "Data Augmentation": "No",
    "Regularization": "No",
    "Optimizer": "Adam",
    "Learning Rate": 0.001,
}

In [17]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(124, 129, 1)),
    tf.keras.layers.Resizing(32, 32),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(30),
])

In [18]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=config["Learning Rate"]),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

In [19]:
history = model.fit(ds_train, epochs=100, validation_data=ds_val, callbacks=get_callbacks("../models/" + path))

Epoch 1/100


I0000 00:00:1714183043.571636  174648 service.cc:145] XLA service 0x7f4ec40048f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1714183043.572213  174648 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 2060 with Max-Q Design, Compute Capability 7.5
2024-04-27 03:57:24.281009: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-04-27 03:57:32.819555: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m  1/200[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:11:58[0m 22s/step - accuracy: 0.0352 - loss: 3.4127

I0000 00:00:1714183063.276806  174648 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 122ms/step - accuracy: 0.3030 - loss: 2.5734 - val_accuracy: 0.6420 - val_loss: 1.3151
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 90ms/step - accuracy: 0.6981 - loss: 1.1032 - val_accuracy: 0.7555 - val_loss: 0.9354
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 51ms/step - accuracy: 0.8006 - loss: 0.7138 - val_accuracy: 0.7886 - val_loss: 0.7967
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 50ms/step - accuracy: 0.8520 - loss: 0.5219 - val_accuracy: 0.8064 - val_loss: 0.7572
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 49ms/step - accuracy: 0.8823 - loss: 0.4126 - val_accuracy: 0.8111 - val_loss: 0.7474
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step - accuracy: 0.9031 - loss: 0.3348 - val_accuracy: 0.8210 - val_loss: 0.7746
Epoch 7/100
[1m200/20

In [22]:
eval_and_save(model_type, ds_test, config, history, path)

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - accuracy: 0.8273 - loss: 1.2903
