In [1]:
from scipy import signal
import librosa
import numpy as np

RATE = 22050
sos = signal.butter(5, [50, 5000], 'bandpass', fs=RATE, output='sos')

def get_mfccs(audio): 
   
    try:
        audio = 2*((audio-min(audio))/(max(audio)-min(audio)))-1

        #Filter
        audio = signal.sosfilt(sos, audio) 

        mfccs = librosa.feature.mfcc(y=audio, sr=RATE, n_mfcc=40)

    except Exception as e:
            print("Error extracting features")
            return None
     
    return mfccs

In [2]:
# Initialize pyaudio settings

import pyaudio

RATE = 22050
CHUNK = RATE*3
FORMAT = pyaudio.paInt16
CHANNELS = 1

# Identify which input is the mic - use to change input_device_index below
p = pyaudio.PyAudio()

chosen_device_index = -1
for x in range(0,p.get_device_count()):
    info = p.get_device_info_by_index(x)
    print(p.get_device_info_by_index(x))


{'index': 0, 'structVersion': 2, 'name': 'Microsoft Sound Mapper - Input', 'hostApi': 0, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}
{'index': 1, 'structVersion': 2, 'name': 'Microphone (Realtek(R) Audio)', 'hostApi': 0, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}
{'index': 2, 'structVersion': 2, 'name': 'Microsoft Sound Mapper - Output', 'hostApi': 0, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}
{'index': 3, 'structVersion': 2, 'name': 'Speakers (Realtek(R) Audio)', 'hostApi': 0, 'maxInputChannels': 

In [3]:
from keras.models import load_model

modelSave = 'C:\\Users\\dell\\Desktop\\AI\\Grad\\2nd\\siren\\siren_detector_32_32_64_64.h5' 
model = load_model(modelSave)






In [4]:
# Real time detection
# 1) reads in 3-second audio chunk (defined by CHUNK)
# 2) gets MFCCs
# 3) runs MFCCs through keras model to predict probabilities 
# 4) Determines whether siren was present based on threshold

stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input_device_index=1, # CHANGE
    input=True,
    output=True,
    frames_per_buffer=CHUNK
 )


import sounddevice as sd

num_rows = 40
num_columns = 130
num_channels = 1

prob_thresh = 0.98 # probability threshold for detecting the siren

while True:
    data = stream.read(CHUNK)
    data_int = np.frombuffer(data, dtype=np.int16)
    
    
    data_int = get_mfccs(data_int)

    prediction_feature = data_int.reshape(1, num_rows, num_columns, num_channels)

    predicted_proba_vector = model.predict(prediction_feature)
    # Access probabilities directly
    not_siren_prob = predicted_proba_vector[0][0]
    siren_prob = predicted_proba_vector[0][1]

    print("not_siren: ", not_siren_prob, "siren: ", siren_prob)

    if siren_prob > prob_thresh:
        print('SIREN!!!')
    else:
         print('No siren. Carry on.')


    

not_siren:  0.95558274 siren:  0.04441729
No siren. Carry on.
not_siren:  0.8636085 siren:  0.13639157
No siren. Carry on.
not_siren:  0.3467342 siren:  0.65326583
No siren. Carry on.
not_siren:  0.9789165 siren:  0.021083495
No siren. Carry on.
not_siren:  0.9702333 siren:  0.029766753
No siren. Carry on.
not_siren:  0.9999987 siren:  1.3260304e-06
No siren. Carry on.
not_siren:  3.1334536e-11 siren:  1.0
SIREN!!!
not_siren:  1.16358105e-08 siren:  1.0
SIREN!!!
not_siren:  6.3958243e-09 siren:  1.0
SIREN!!!
not_siren:  8.651954e-08 siren:  0.9999999
SIREN!!!
not_siren:  1.7736852e-07 siren:  0.9999999
SIREN!!!
not_siren:  0.9974419 siren:  0.0025581613
No siren. Carry on.
not_siren:  0.93116534 siren:  0.068834625
No siren. Carry on.
not_siren:  0.98952955 siren:  0.010470451
No siren. Carry on.
