**Converting all Files present in fine_tune_data to Mono 16 kHz (the format required by YAMNet)**

In [None]:
!pip install pydub
!apt-get install ffmpeg -y


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!cp -r /content/drive/MyDrive/fine_tune_data /content/

In [None]:
from pydub import AudioSegment
import os

input_root = "/content/fine_tune_data"
# tracking how many failed files
fail_count = 0
success_count = 0
# Walk through all label folders
for label in os.listdir(input_root):
  label_path = os.path.join(input_root, label)
  if not os.path.isdir(label_path):
    continue
  print(f"🎧 Processing label: {label}")

  for filename in os.listdir(label_path):
    file_path = os.path.join(label_path, filename)
    if not (filename.endswith(".wav") or filename.endswith(".ogg")):
      continue    # skip non-audio files
    try:
      audio = AudioSegment.from_file(file_path)
      audio = audio.set_channels(1)        # Mono
      audio = audio.set_frame_rate(16000)  # 16 kHz
      # Overwrite with processed version
      audio.export(file_path, format = 'wav')
      success_count += 1
    except Exception as e:
      print(f"❌ Failed: {filename} - Reason: {e}")
      fail_count += 1
print(f"\n ✅ Done! Converted: {success_count} files | Skipped(corrupt or unsupported): {fail_count} files.")

🎧 Processing label: normal
❌ Failed: 36429-2-0-18.wav - Reason: Decoding failed. ffmpeg returned error code: 1

Output from ffmpeg/avlib:

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-li

**Fine tuning of YAMNet model**

In [None]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Installing Dependencies
#!pip install tensorflow tensorflow_hub librosa soundfile
# Requirements are already satisfied

In [None]:
# Import Libraries
import os
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import librosa
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers, models


In [None]:
# Loading YAMNet and Defining Embedding Function
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

def extract_embedding(file_path):
    waveform, sr = librosa.load(file_path, sr=16000)  # Ensure mono 16kHz
    scores, embeddings, spectrogram = yamnet_model(waveform)
    return tf.reduce_mean(embeddings, axis=0).numpy()  # Aggregate embeddings


In [None]:
# Loading fine_tune_data Dataset
data_dir = '/content/drive/MyDrive/fine_tune_data'
X, y = [], []

label_map = {
    'scream_augmented': 1,
    'gun_shot': 1,
    'glass_break': 1,
    'siren': 1,
    'help_augmented': 1,
    'normal': 0
}


for label_folder in os.listdir(data_dir):
    folder_path = os.path.join(data_dir, label_folder)
    if not os.path.isdir(folder_path): continue

    mapped_label = label_map[label_folder]
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if not file_path.endswith('.wav'): continue
        try:
            emb = extract_embedding(file_path)
            X.append(emb)
            y.append(mapped_label)
        except Exception as e:
            print(f"❌ Skipping {file_path} — {e}")

X = np.array(X)
y = np.array(y)


In [None]:
# Create and Train Custom Classifier
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

model = models.Sequential([
    layers.Input(shape=(1024,)),  # YAMNet embedding size
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(len(label_map), activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32)


Epoch 1/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9204 - loss: 0.2845 - val_accuracy: 0.9640 - val_loss: 0.0826
Epoch 2/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9696 - loss: 0.0999 - val_accuracy: 0.9762 - val_loss: 0.0917
Epoch 3/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9760 - loss: 0.0899 - val_accuracy: 0.9828 - val_loss: 0.0837
Epoch 4/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9802 - loss: 0.0708 - val_accuracy: 0.9795 - val_loss: 0.0734
Epoch 5/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9835 - loss: 0.0535 - val_accuracy: 0.9767 - val_loss: 0.0884
Epoch 6/20
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9825 - loss: 0.0762 - val_accuracy: 0.9823 - val_loss: 0.0580
Epoch 7/20
[1m226/226[0m 

In [None]:
# Saving the Trained Model
model.save('/content/drive/MyDrive/fine_tuned_yamnet_classifier1.keras', include_optimizer = False)

In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/fine_tuned_yamnet_classifier1.keras')

**Converting and saving the .tflite model**

In [None]:
import tensorflow as tf

# Loading the model from the memory
model = tf.keras.models.load_model('/content/drive/MyDrive/fine_tuned_yamnet_classifier1.keras')

# Converting to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Saving the TFLite model to Google Drive
tflite_path = '/content/drive/MyDrive/fine_tuned_yamnet_classifier1.tflite'
with open(tflite_path, 'wb') as f:
    f.write(tflite_model)

print(f"TFLite model saved to: {tflite_path}")


Saved artifact at '/tmp/tmpbiuf8v_a'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 1024), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 6), dtype=tf.float32, name=None)
Captures:
  135546696622416: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135546696621456: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135546696615888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135546689659408: TensorSpec(shape=(), dtype=tf.resource, name=None)
TFLite model saved to: /content/drive/MyDrive/fine_tuned_yamnet_classifier1.tflite


**Saving Label mapping**

In [None]:
import json
label_map = {
    "non_emergency": 0,
    "emergency": 1
}
with open('/content/drive/MyDrive/label1_map.json', 'w') as f:
    json.dump(label_map, f)


In [None]:
print("Unique labels used in training:", set(y))


Unique labels used in training: {np.int64(0), np.int64(1)}
