In [1]:
import tensorflow as tf
from tensorflow.keras import layers,models
import numpy as np

In [29]:
train_data, test_data = tf.keras.utils.audio_dataset_from_directory(
    directory  = 'data',
    batch_size = 64,
    subset = 'both',
    seed = 42,
    output_sequence_length = 16000,
    validation_split = 0.2
)
label_names = np.array(train_data.class_names)

Found 7705 files belonging to 8 classes.
Using 6164 files for training.
Using 1541 files for validation.


In [30]:
def squeeze(audio,labels):
    audio = tf.squeeze(audio,axis=-1)
    return audio,labels

In [31]:
training = train_data.map(squeeze,tf.data.AUTOTUNE)
test_data = test_data.map(squeeze,tf.data.AUTOTUNE)

In [32]:
testing = test_data.shard(num_shards=2, index=0)
validating = test_data.shard(num_shards=2,index=1)

In [33]:
def spectogram(waveform):
    spectogram = tf.signal.stft(
        waveform, frame_length=255, frame_step=128
    )
    spectogram = tf.abs(spectogram)
    spectogram = spectogram[...,tf.newaxis]
    return spectogram

In [34]:
def make_dataset(dataset):
    return dataset.map(
        map_func = lambda audio,labels : (spectogram(audio),labels),
        num_parallel_calls = tf.data.AUTOTUNE
    )

In [35]:
training = make_dataset(training)
testing = make_dataset(testing)
validating = make_dataset(validating)


TypeError: Unsupported return value from function passed to Dataset.map(): ([Ellipsis, None], <tf.Tensor 'args_1:0' shape=(None,) dtype=int32>).

In [19]:
training = training.cache().shuffle(10000).prefetch(tf.data.AUTOTUNE)
testing = testing.cache().prefetch(tf.data.AUTOTUNE)
validating = validating.cache().prefetch(tf.data.AUTOTUNE)

In [20]:
norm_layers = layers.Normalization()
norm_layers.adapt(data = training.map(map_func = lambda spec, labels:spec))

In [22]:
for example_spectogram, example in training.take(1):
    break

input_shape = example_spectogram.shape[1:]
print(input_shape)

(124, 129, 1)


In [23]:
num_labels = len(label_names)

In [24]:
model = models.Sequential([
    layers.Input(input_shape),
    layers.Resizing(32,32),
    norm_layers,
    layers.Conv2D(32,3,activation = 'relu'),
    layers.Conv2D(64,3,activation = 'relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation = 'relu'),
    layers.Dropout(0.5),
    layers.Dense(num_labels)
])

In [25]:
model.summary()

In [27]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(),
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    metrics = ['accuracy']
)

In [28]:
history = model.fit(
    training,
    validation_data = validating,
    epochs = 10,
    callbacks = tf.keras.callbacks.EarlyStopping(verbose = 1, patience = 2)
)

Epoch 1/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 45ms/step - accuracy: 0.2655 - loss: 1.9289 - val_accuracy: 0.5807 - val_loss: 1.3267
Epoch 2/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - accuracy: 0.5442 - loss: 1.2767 - val_accuracy: 0.6823 - val_loss: 0.9612
Epoch 3/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - accuracy: 0.6588 - loss: 0.9340 - val_accuracy: 0.7435 - val_loss: 0.7756
Epoch 4/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - accuracy: 0.7228 - loss: 0.7678 - val_accuracy: 0.7539 - val_loss: 0.7191
Epoch 5/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 33ms/step - accuracy: 0.7664 - loss: 0.6544 - val_accuracy: 0.7982 - val_loss: 0.6514
Epoch 6/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - accuracy: 0.7961 - loss: 0.5758 - val_accuracy: 0.8047 - val_loss: 0.5737
Epoch 7/10
[1m97/97[0m [32m━━━━

KeyboardInterrupt: 