# Siren Detection & Recognision in Real Time 

### Import necessary libraries

In [1]:
import librosa
import pyaudio
import numpy as np
import tensorflow as tf
from scipy import signal
from keras.models import load_model

tf.get_logger().setLevel('ERROR')




### Constants

In [2]:
RATE = 22050
CHUNK = RATE * 3
FORMAT = pyaudio.paInt16
CHANNELS = 1

### Load the models

In [3]:
# Load the Siren Detection Model
model_detection = load_model('Detection.h5')

# Load the Siren Recognition Model
model_recognition = tf.keras.models.load_model('Recognision.h5')

# Butterworth bandpass filter parameters
sos = signal.butter(5, [50, 5000], 'bandpass', fs=RATE, output='sos')

In [4]:
model_detection.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_20 (Conv2D)          (None, 39, 129, 32)       160       
                                                                 
 max_pooling2d_17 (MaxPooli  (None, 19, 64, 32)        0         
 ng2D)                                                           
                                                                 
 dropout_17 (Dropout)        (None, 19, 64, 32)        0         
                                                                 
 conv2d_21 (Conv2D)          (None, 18, 63, 32)        4128      
                                                                 
 max_pooling2d_18 (MaxPooli  (None, 9, 31, 32)         0         
 ng2D)                                                           
                                                                 
 dropout_18 (Dropout)        (None, 9, 31, 32)        

In [11]:
model_recognition.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 80, 1)]           0         
                                                                 
 conv1d (Conv1D)             (None, 80, 3)             42        
                                                                 
 max_pooling1d (MaxPooling1  (None, 40, 3)             0         
 D)                                                              
                                                                 
 conv1d_1 (Conv1D)           (None, 40, 16)            544       
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 20, 16)            0         
 g1D)                                                            
                                                                 
 global_max_pooling1d (Glob  (None, 16)                0     

In [12]:
detection_input_shape = model_detection.input_shape[1:]
recognition_input_shape = model_recognition.layers[0].input_shape[0][1:]

print("Siren Detection Model Input Shape: ", detection_input_shape)
print("Siren Recognition Model Input Shape: ", recognition_input_shape)

Siren Detection Model Input Shape:  (40, 130, 1)
Siren Recognition Model Input Shape:  (80, 1)


### Preprocessing function for Siren Detection

In [13]:
def preprocess_detection(audio_data):
    audio_data = signal.sosfilt(sos, audio_data)
    mfccs = librosa.feature.mfcc(y=audio_data, sr=RATE, n_mfcc=40)
    mfccs_padded = np.pad(mfccs, ((0, 0), (0, max(0, detection_input_shape[1] - mfccs.shape[1]))), mode='constant')
    mfccs_padded = mfccs_padded.reshape(detection_input_shape)
    mfccs_padded = np.expand_dims(mfccs_padded, axis=0)
    return mfccs_padded

### Preprocessing function for Siren Recognition

In [14]:
def preprocess_recognition(audio_data):
    audio_data = audio_data[:RATE]
    mfccs = librosa.feature.mfcc(y=audio_data, sr=RATE, n_mfcc=80)
    mfccs_scaled_features = np.mean(mfccs.T, axis=0)
    mfccs_scaled_features = mfccs_scaled_features.reshape(recognition_input_shape)
    mfccs_scaled_features = np.expand_dims(mfccs_scaled_features, axis=0)
    return mfccs_scaled_features

### Real-time detection and recognition loop

In [19]:
def real_time_detection_recognition():
    # Initialize PyAudio
    p = pyaudio.PyAudio()
    chosen_device_index = -1  # Adjust if necessary
    for x in range(p.get_device_count()):
        info = p.get_device_info_by_index(x)

    # Open stream for real-time audio input
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input_device_index=chosen_device_index,
                    input=True,
                    output=True,
                    frames_per_buffer=CHUNK)

    Detection_Threshold = 0.5
    while True:
        # Read audio data from stream
        data = stream.read(CHUNK)
        audio_data = np.frombuffer(data, dtype=np.int16)
        
        # # Convert audio data to floating-point format
        audio_data = audio_data.astype(np.float32) / 32767.0
        audio_data = 2 * ((audio_data - min(audio_data)) / (max(audio_data) - min(audio_data))) - 1
        
        # Detect siren using detection model
        prediction_feature = preprocess_detection(audio_data)
        predicted_proba_vector = model_detection.predict(prediction_feature, verbose=0)
        siren_prob = predicted_proba_vector[0][1]

        # If siren is detected, perform recognition
        if siren_prob > Detection_Threshold:
            preprocessed_data_recognition = preprocess_recognition(audio_data)
            predicted_class = np.argmax(model_recognition.predict(preprocessed_data_recognition, verbose=0)[0])
            siren_type = {0: "Ambulance", 1: "Firetruck", 2: "Traffic"}[predicted_class]
            print(f"SIREN DETECTED! Type: {siren_type}")
        else:
            print(f"No siren detected, Certainty: {(siren_prob * 100):.2f}%")

    # Close the audio stream and PyAudio instance
    stream.stop_stream()
    stream.close()
    p.terminate()

In [20]:
if __name__ == "__main__":
    try:
        real_time_detection_recognition()
    except KeyboardInterrupt:
        print("Program terminated by user")
    except Exception as e:
        print(f"An error occurred: {e}")

No siren detected, Certainty: 14.28%
No siren detected, Certainty: 4.79%
SIREN DETECTED! Type: Ambulance
SIREN DETECTED! Type: Firetruck
SIREN DETECTED! Type: Firetruck
SIREN DETECTED! Type: Firetruck
SIREN DETECTED! Type: Firetruck
SIREN DETECTED! Type: Firetruck
SIREN DETECTED! Type: Firetruck
SIREN DETECTED! Type: Ambulance
SIREN DETECTED! Type: Ambulance
SIREN DETECTED! Type: Firetruck
SIREN DETECTED! Type: Ambulance
SIREN DETECTED! Type: Ambulance
Program terminated by user


---