In [1]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Dense, Dropout,
                                     BatchNormalization, Reshape, LSTM)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report

In [2]:
# --- Configuration ---
DATASET_PATH = r"E:\Campus\Semester\FYP\siren_detection_project\dataset" # Use your dataset path
N_MFCC = 40
MAX_LEN = 174 # Fixed length for features
SR = 22050 # Sample Rate

In [3]:
def augment_data(y, sr):
    """Applies basic augmentation to the audio data."""
    noise = np.random.randn(len(y)) * 0.005
    y_aug = y + noise
    if np.random.rand() > 0.5:
        y_aug = librosa.effects.pitch_shift(y=y_aug, sr=sr, n_steps=np.random.randint(-2, 3))
    return y_aug

def extract_features(file_path, augment=False, n_mfcc=N_MFCC, max_len=MAX_LEN):
    """Extracts features from an audio file."""
    y, sr = librosa.load(file_path, sr=SR)
    if augment:
        y = augment_data(y, sr)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    features = np.vstack([mfcc, delta, delta2])
    if features.shape[1] < max_len:
        features = np.pad(features, ((0, 0), (0, max_len - features.shape[1])), mode='constant')
    else:
        features = features[:, :max_len]
    return features

def load_and_process_dataset(folder_path):
    """Loads all data and extracts features."""
    features_list = []
    labels = []
    for label in ['siren', 'non_siren']:
        sub_folder = os.path.join(folder_path, label)
        for file in os.listdir(sub_folder):
            if file.endswith('.wav'):
                path = os.path.join(sub_folder, file)
                features = extract_features(path, augment=True)
                features_list.append(features)
                labels.append(1 if label == 'siren' else 0)
    return features_list, np.array(labels)

In [4]:
# 1. Load Data and Split
print("Loading and processing dataset...")
features_list, y = load_and_process_dataset(DATASET_PATH)
X_train_list, X_test_list, y_train, y_test = train_test_split(
    features_list, y, test_size=0.2, random_state=42, stratify=y
)

Loading and processing dataset...


  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


In [5]:
# 2. Calculate Normalization Stats from Training Data ONLY
print("Calculating normalization statistics...")
concatenated_train_features = np.concatenate(X_train_list, axis=1)
mean = np.mean(concatenated_train_features)
std = np.std(concatenated_train_features)
np.savez('norm_stats.npz', mean=mean, std=std)
print(f"Normalization Stats: Mean={mean:.4f}, Std={std:.4f}")

Calculating normalization statistics...
Normalization Stats: Mean=-0.6704, Std=17.3766


In [6]:
# 3. Normalize Data and Reshape
def normalize_and_reshape(feature_list, mean, std):
    normalized_features = [(f - mean) / std for f in feature_list]
    return np.array(normalized_features)[..., np.newaxis]

X_train = normalize_and_reshape(X_train_list, mean, std)
X_test = normalize_and_reshape(X_test_list, mean, std)

In [7]:
# 4. Build the CNN + LSTM Model
model = Sequential([
    # CNN Feature Extractor
    Conv2D(64, (3, 3), activation='relu', input_shape=X_train.shape[1:], padding='same'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(256, (3, 3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    # Reshape for LSTM: (batch, height, width, channels) -> (batch, time_steps, features)
    # After 3 pooling layers on a (120, 174) input, shape is (15, 21, 256)
    # We treat the width (21) as time_steps and flatten height*channels into features.
    Reshape((21, 15 * 256)),

    # LSTM Temporal Modeler
    LSTM(128, return_sequences=False),
    Dropout(0.4),

    # Classifier Head
    Dense(64, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# 5. Train the Model
print("\n--- Training Model ---")
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
]
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=callbacks
)


--- Training Model ---
Epoch 1/50
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m304s[0m 2s/step - accuracy: 0.7910 - loss: 0.4249 - val_accuracy: 0.5174 - val_loss: 0.6817 - learning_rate: 1.0000e-04
Epoch 2/50
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 2s/step - accuracy: 0.9762 - loss: 0.0958 - val_accuracy: 0.6250 - val_loss: 0.6142 - learning_rate: 1.0000e-04
Epoch 3/50
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 2s/step - accuracy: 0.9811 - loss: 0.0842 - val_accuracy: 0.7878 - val_loss: 0.4290 - learning_rate: 1.0000e-04
Epoch 4/50
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m305s[0m 2s/step - accuracy: 0.9877 - loss: 0.0623 - val_accuracy: 0.9564 - val_loss: 0.1364 - learning_rate: 1.0000e-04
Epoch 5/50
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 2s/step - accuracy: 0.9894 - loss: 0.0477 - val_accuracy: 0.9835 - val_loss: 0.0671 - learning_rate: 1.0000e-04
Epoch 6/50
[1m129/12

In [9]:
# 6. Evaluate and Save
print("\n--- Evaluating Model ---")
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")
model.save("siren_model_cnn_lstm.h5")
print("\nModel and normalization stats saved successfully!")


--- Evaluating Model ---




Test Accuracy: 99.13%

Model and normalization stats saved successfully!
