# Final Emergency Sound Classification Model
This notebook contains the full code for the best-performing model:
- CNN using 128-band Mel Spectrograms
- SpecAugment
- Class weights
- Learning rate scheduler
- Achieved 94% accuracy

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install dependencies
!pip install audiomentations

In [None]:
# Import libraries
import os
import numpy as np
import librosa
import random
from audiomentations import Compose, PitchShift, TimeStretch, Gain, Shift
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.utils import class_weight
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

In [None]:
# Paths and constants
DATA_PATH = "/content/drive/MyDrive/Split_Dataset"
SAMPLE_RATE = 16000
DURATION = 3
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
N_MELS = 128

In [None]:
# Augmentation
AUGMENT = Compose([
    PitchShift(min_semitones=-2, max_semitones=2, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.2, p=0.5),
    Gain(min_gain_db=-6, max_gain_db=6, p=0.5),
    Shift(min_fraction=-0.1, max_fraction=0.1, p=0.5)
])

In [None]:
# Mel spectrogram extraction with SpecAugment
def extract_mel_spectrogram(file_path, apply_augment=False):
    try:
        audio, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
        if len(audio.shape) > 1:
            audio = librosa.to_mono(audio)
        if len(audio) < SAMPLES_PER_TRACK:
            audio = np.pad(audio, (0, SAMPLES_PER_TRACK - len(audio)))
        else:
            audio = audio[:SAMPLES_PER_TRACK]
        if apply_augment:
            audio = AUGMENT(samples=audio, sample_rate=sr)
        mel = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=N_MELS)
        mel_db = librosa.power_to_db(mel, ref=np.max)
        return mel_db
    except Exception as e:
        print(f"Error: {file_path} | {e}")
        return None

def apply_specaugment(mel, time_mask_param=15, freq_mask_param=10):
    m = mel.copy()
    f = random.randint(0, freq_mask_param)
    f0 = random.randint(0, m.shape[0] - f)
    m[f0:f0+f, :] = 0
    t = random.randint(0, time_mask_param)
    t0 = random.randint(0, m.shape[1] - t)
    m[:, t0:t0+t] = 0
    return m

In [None]:
# Data Generator
class AudioDataGenerator(Sequence):
    def __init__(self, file_paths, labels, batch_size=32, augment=False, shuffle=True):
        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size
        self.augment = augment
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_paths = self.file_paths[index*self.batch_size:(index+1)*self.batch_size]
        batch_labels = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        X, y = [], []

        for path, label in zip(batch_paths, batch_labels):
            mel = extract_mel_spectrogram(path, apply_augment=self.augment)
            if mel is not None and mel.shape[1] == 94:
                if self.augment:
                    mel = apply_specaugment(mel)
                X.append(mel[..., np.newaxis])
                y.append(label)

        if len(X) < 2:
            return self.__getitem__((index + 1) % self.__len__())

        return np.array(X), np.array(y)

    def on_epoch_end(self):
        if self.shuffle:
            zipped = list(zip(self.file_paths, self.labels))
            random.shuffle(zipped)
            self.file_paths, self.labels = zip(*zipped)

In [None]:
# Prepare data
file_paths = []
labels = []

for label in os.listdir(DATA_PATH):
    folder = os.path.join(DATA_PATH, label)
    if os.path.isdir(folder):
        for file in os.listdir(folder):
            if file.endswith('.wav'):
                file_paths.append(os.path.join(folder, file))
                labels.append(label)

le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
labels_onehot = to_categorical(labels_encoded)

train_paths, test_paths, train_labels, test_labels = train_test_split(
    file_paths, labels_onehot, test_size=0.2, stratify=labels_encoded, random_state=42
)

train_gen = AudioDataGenerator(train_paths, train_labels, augment=True)
val_gen = AudioDataGenerator(test_paths, test_labels, augment=False)

In [None]:
# Define model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 94, 1)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(labels_onehot.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# Compute class weights
y_train_labels = np.argmax(train_labels, axis=1)
class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                  classes=np.unique(y_train_labels),
                                                  y=y_train_labels)
class_weight_dict = dict(enumerate(class_weights))

In [None]:
# Train model with callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint('/content/drive/MyDrive/best_final_model.h5', save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, min_lr=1e-6)
]

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=30,
    callbacks=callbacks,
    class_weight=class_weight_dict
)