In [1]:
import tensorflow as tf
from utils import (
    get_datasets,
    waveform_to_spectrograms,
    waveform_to_log_mel_spectrogram,
    eval_and_save,
    get_callbacks,
    CustomSchedule,
)
from Transformer import Transformer
SEED = 1337
tf.random.set_seed(SEED)


2024-04-29 17:03:54.386487: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
sample_rate = 16000
frame_length = 255
frame_step = 128
num_mel_bins = 129
ds_train_raw, ds_val_raw, ds_test_raw = get_datasets()
model_type = "Transformer"

Found 51088 files belonging to 30 classes.


2024-04-29 17:04:10.619188: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 17:04:11.061373: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 17:04:11.061455: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 17:04:11.072280: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-29 17:04:11.072388: I external/local_xla/xla/stream_executor

Found 6798 files belonging to 30 classes.
Found 6835 files belonging to 30 classes.


In [3]:
path = "Transformer6.weights.h5"
config = {
    "Spectrogram": "Log-Mel",
    "Regularization": "Dropout",
    "Optimizer": "Adam",
    "Learning Rate": 0.001,
    "Batch Size": 256,
    "d_model": 256,
    "num_layers": 4,
    "num_heads": 2,
    "dropout_rate": 0.2,
}

ds_train = (
    ds_train_raw.batch(config["Batch Size"])
    .map(
        lambda x, y: (
            waveform_to_log_mel_spectrogram(
                x,
                sample_rate=sample_rate,
                frame_length=frame_length,
                frame_step=frame_step,
                num_mel_bins=num_mel_bins,
            ),
            y,
        )
    )
    .cache()
    .prefetch(tf.data.experimental.AUTOTUNE)
)
ds_val = (
    ds_val_raw.batch(config["Batch Size"])
    .map(
        lambda x, y: (
            waveform_to_log_mel_spectrogram(
                x,
                sample_rate=sample_rate,
                frame_length=frame_length,
                frame_step=frame_step,
                num_mel_bins=num_mel_bins,
            ),
            y,
        )
    )
    .cache()
    .prefetch(tf.data.experimental.AUTOTUNE)
)
ds_test = (
    ds_test_raw.batch(config["Batch Size"])
    .map(
        lambda x, y: (
            waveform_to_log_mel_spectrogram(
                x,
                sample_rate=sample_rate,
                frame_length=frame_length,
                frame_step=frame_step,
                num_mel_bins=num_mel_bins,
            ),
            y,
        )
    )
    .cache()
    .prefetch(tf.data.experimental.AUTOTUNE)
)

In [4]:
model = Transformer(
    num_layers=config["num_layers"],
    d_model=config["d_model"],
    num_heads=config["num_heads"],
    dff=4 * config["d_model"],
    block_size=62,
    dropout_rate=config["dropout_rate"],
    num_classes=30,
)
lr = CustomSchedule(config["d_model"])
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr, beta_1=0.9, beta_2=0.98, epsilon=1e-9),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)
history = model.fit(
    ds_train,
    validation_data=ds_val,
    epochs=100,
    callbacks=get_callbacks("../models/" + path),
)

Epoch 1/100


I0000 00:00:1714403084.726330    3929 service.cc:145] XLA service 0x7f9e6c014f90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1714403084.728372    3929 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 2060 with Max-Q Design, Compute Capability 7.5
2024-04-29 17:04:46.440912: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-04-29 17:04:55.153550: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907



2024-04-29 17:05:45.392592: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng30{k2=1,k4=2,k5=1,k6=0,k7=0} for conv (f32[256,256,1,124]{3,2,1,0}, u8[0]{0}) custom-call(f32[256,256,1,126]{3,2,1,0}, f32[256,256,1,3]{3,2,1,0}), window={size=1x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convForward", backend_config={"operation_queue

[1m199/200[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m1s[0m 2s/step - accuracy: 0.0504 - loss: 3.7069




2024-04-29 17:12:45.591134: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 12.71GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-04-29 17:12:45.662857: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 6.91GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m583s[0m 2s/step - accuracy: 0.0509 - loss: 3.7025 - val_accuracy: 0.3538 - val_loss: 2.1484
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m726s[0m 4s/step - accuracy: 0.5283 - loss: 1.5741 - val_accuracy: 0.8189 - val_loss: 0.6279
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 1s/step - accuracy: 0.8426 - loss: 0.5377 - val_accuracy: 0.8723 - val_loss: 0.4312
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 967ms/step - accuracy: 0.8951 - loss: 0.3622 - val_accuracy: 0.8926 - val_loss: 0.3719
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 877ms/step - accuracy: 0.9150 - loss: 0.2918 - val_accuracy: 0.8978 - val_loss: 0.3539
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m275s[0m 1s/step - accuracy: 0.9259 - loss: 0.2534 - val_accuracy: 0.9007 - val_loss: 0.3428
Epoch 7/100
[1m200/20

In [5]:
eval_and_save(model_type, model, ds_test, config, history, path)

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 650ms/step - accuracy: 0.9154 - loss: 0.3063
