In [4]:
import math
from pathlib import Path
from typing import Tuple
import numpy as np
from random import sample
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))

SPECTRO_ROOT = Path("/home/div/vsCode/Audio_classifier_Model/data/urbansound8k_spectrograms")
CLASS_NAMES = sorted([d.name for d in SPECTRO_ROOT.iterdir() if d.is_dir()])
CLASS_TO_INDEX = {name: idx for idx, name in enumerate(CLASS_NAMES)}
MAX_FRAMES = 174  # Adjust based on spectrogram dimensions
tf_files = []
tf_labels = []

# Enable memory growth to prevent full GPU allocation
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Memory growth enabled for {len(gpus)} GPU(s)")
    except RuntimeError as e:
        print(e)

TensorFlow version: 2.20.0
GPU Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Physical devices cannot be modified after being initialized


In [5]:
# Setting up file paths and labels

for cls in CLASS_NAMES:
    for npy in (SPECTRO_ROOT / cls).glob("*.npy"):
        tf_files.append(str(npy))
        tf_labels.append(CLASS_TO_INDEX[cls])
tf_files = np.array(tf_files)
tf_labels = np.array(tf_labels, dtype=np.int32)

def load_npy(path, label):
    spec = np.load(path.decode("utf-8")).astype(np.float32)
    spec = (spec - spec.mean()) / (spec.std() + 1e-6)

    if spec.shape[1] < MAX_FRAMES:
        pad = MAX_FRAMES - spec.shape[1]
        spec = np.pad(spec, ((0, 0), (0, pad)), mode="constant")
    else:
        spec = spec[:, :MAX_FRAMES]

    spec = np.expand_dims(spec, axis=-1)
    return spec, label

def tf_loader(paths, labels, batch_size=32, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(paths))
    ds = ds.map(
        lambda p, l: tf.numpy_function(load_npy, [p, l], [tf.float32, tf.int32]),
        num_parallel_calls=tf.data.AUTOTUNE,
    )
    ds = ds.map(
        lambda x, y: (tf.ensure_shape(x, [1025, MAX_FRAMES, 1]), tf.ensure_shape(y, [])),
        num_parallel_calls=tf.data.AUTOTUNE,
    )
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

# Dataset printout
print(f"Total samples: {len(tf_files)}")
for cls in CLASS_NAMES:
    count = np.sum(tf_labels == CLASS_TO_INDEX[cls])
    print(f"  Class '{cls}': {count} samples")




Total samples: 2732
  Class 'car_horn': 429 samples
  Class 'dog_bark': 1000 samples
  Class 'gun_shot': 374 samples
  Class 'siren': 929 samples


In [None]:
# Creating training and validation datasets

split = int(len(tf_files) * 0.2)
train_ds = tf_loader(tf_files[split:], tf_labels[split:], batch_size=8, shuffle=True)
val_ds = tf_loader(tf_files[:split], tf_labels[:split], batch_size=8, shuffle=False)

# Building and training the CNN model
tf_model = tf.keras.Sequential([
    # tf.keras.layers.Input(shape=(1025, MAX_FRAMES, 1)),
    tf.keras.layers.Conv2D(64, 1, padding="same", activation="relu", input_shape=(1025, MAX_FRAMES, 1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, 3, padding="same", activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(256, 3, padding="same", activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(len(CLASS_NAMES), activation="softmax"),
])

tf_model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"],
)

# Train and report metrics
history = tf_model.fit(train_ds, validation_data=val_ds, epochs=10)

train_metrics = {metric: values[-1] for metric, values in history.history.items()}
print("\nFinal training metrics:")
for metric, value in train_metrics.items():
    print(f"  {metric}: {value:.4f}")

eval_results = tf_model.evaluate(val_ds, return_dict=True)
print("\nValidation evaluation:")
for metric, value in eval_results.items():
    print(f"  {metric}: {value:.4f}")

Epoch 1/10
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 169ms/step - accuracy: 0.6683 - loss: 0.8168 - val_accuracy: 0.1264 - val_loss: 4.7679
Epoch 2/10
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 169ms/step - accuracy: 0.6683 - loss: 0.8168 - val_accuracy: 0.1264 - val_loss: 4.7679
Epoch 2/10
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 141ms/step - accuracy: 0.6972 - loss: 0.6602 - val_accuracy: 0.1502 - val_loss: 5.5280
Epoch 3/10
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 141ms/step - accuracy: 0.6972 - loss: 0.6602 - val_accuracy: 0.1502 - val_loss: 5.5280
Epoch 3/10
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 142ms/step - accuracy: 0.7466 - loss: 0.5931 - val_accuracy: 0.1392 - val_loss: 5.9816
Epoch 4/10
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 142ms/step - accuracy: 0.7466 - loss: 0.5931 - val_accuracy: 0.1392 - val_loss: 5.9816
Epoch 4/10