In [4]:
import librosa
import os
import numpy as np
from keras.layers import Conv1D, Dropout, MaxPooling1D, GlobalMaxPool1D, Dense
from keras.models import Model
from tensorflow.python.keras.models import Input
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import pandas as pd

from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
## pip install resampy

In [5]:
def features_extractor(file_name):
    audio, sample_rate = librosa.load(file_name) 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=80)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)

    return mfccs_scaled_features

In [6]:
audio_dataset_path = 'sound data/'

data_folder = os.listdir(audio_dataset_path)
try:
    data_folder.remove(".DS_Store")
except:
    pass

extracted_features = []
for path in data_folder:
    for file in os.listdir(audio_dataset_path+path+"/"):
        if file.lower().endswith(".wav"):
            file_name = audio_dataset_path+path+"/"+file
            data = features_extractor(file_name)  
            extracted_features.append([data, path])

extracted_features

[[array([-5.2813702e+02,  9.4486557e+01, -4.5078941e+01, -1.4321530e+01,
         -2.3735701e+01,  1.6802542e+01,  3.8335503e+01,  2.1904364e+01,
         -2.3139427e+01, -9.9927692e+00,  2.3932653e+01,  1.9768072e+01,
         -2.3172291e+01, -5.9529076e+00,  9.6585197e+00, -1.0189725e+00,
          7.8582845e+00, -1.7064669e+00, -1.4118227e+01,  1.1042445e+01,
          7.7602329e+00, -1.4446917e+01, -2.2828608e+00,  3.6006453e+00,
          7.2715783e-01, -1.8981941e+00, -3.4969161e+00,  4.5665226e+00,
         -4.0910711e+00,  6.6126137e+00, -3.1547174e+00, -2.1661315e+00,
          4.1290188e+00,  6.0173082e-01, -9.7044092e-01,  1.9230939e+00,
          3.3595341e-01, -4.3164003e-01,  9.8695844e-01, -1.4526168e+00,
          1.5885115e-01, -4.1165910e+00, -2.4645805e+00,  2.4908555e+00,
         -1.8736881e+00, -9.2493343e-01, -3.4256715e-01,  2.2053041e+00,
          1.0399879e+00, -3.0310855e+00, -3.7846646e-01,  1.9230987e+00,
         -8.5161519e-01, -1.1395930e+00, -7.5265503

In [4]:
df = pd.DataFrame(extracted_features,columns=['feature','class'])
df

Unnamed: 0,feature,class
0,"[-528.137, 94.48656, -45.07894, -14.32153, -23...",Emergency vehicles
1,"[-519.9698, 93.40174, -51.92789, -12.237045, -...",Emergency vehicles
2,"[-31.674479, 94.35858, -31.012617, -7.9644475,...",Emergency vehicles
3,"[-113.77173, 105.908325, -31.313862, -8.947265...",Emergency vehicles
4,"[-161.36879, 121.79017, -30.598888, -19.370924...",Emergency vehicles
...,...,...
395,"[-174.41232, 180.81334, -18.588747, 5.600737, ...",Trafficsound
396,"[-176.86723, 188.84865, -17.638838, 7.1042323,...",Trafficsound
397,"[-187.26295, 188.28955, -23.0138, 5.83086, 11....",Trafficsound
398,"[-193.312, 181.9314, -29.733856, 4.778921, 10....",Trafficsound


In [5]:
df['class'].value_counts()

class
Emergency vehicles    200
Trafficsound          200
Name: count, dtype: int64

In [137]:
X = np.array(df['feature'].tolist())
Y = np.array(df['class'].tolist())

In [45]:
labelencoder = LabelEncoder()
y = to_categorical(labelencoder.fit_transform(Y))

In [101]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0, stratify=y, shuffle=True)

In [6]:
X_train_features = X_train.reshape(len(X_train), 80, 1)
X_test_features = X_test.reshape(len(X_test), 80, 1)
print("Reshaped Array Size", X_train_features.shape)

NameError: name 'X_train' is not defined

In [125]:
inputs = Input(shape=(X_train_features.shape[1], X_train_features.shape[2]))

#First Conv1D layer
conv = Conv1D(3, 13, padding='same', activation="relu")(inputs)

conv = Dropout(0.5)(conv)
conv = MaxPooling1D(2)(conv)

#Second Conv1D layer
conv = Conv1D(16, 11, padding='same', activation="relu")(conv)
conv = Dropout(0.5)(conv)
conv = MaxPooling1D(2)(conv)

#MaxPooling 1D
conv = GlobalMaxPool1D()(conv)

#Dense Layer 
conv = Dense(16, activation="relu")(conv)
outputs = Dense(y_test.shape[1], activation='softmax')(conv)

model = Model(inputs, outputs)
model.compile(loss='binary_crossentropy',optimizer="adam",metrics=['acc'])

In [7]:
model.summary()

NameError: name 'model' is not defined

In [128]:
early_stop = EarlyStopping(monitor = 'val_accuracy', mode ='max',
                          patience = 10, restore_best_weights = True)

history = model.fit(X_train_features, y_train, epochs = 15, 
                       callbacks = [early_stop],
                       batch_size = 64, validation_data = (X_test_features, y_test))

Epoch 1/15




Epoch 2/15




Epoch 3/15




Epoch 4/15




Epoch 5/15




Epoch 6/15




Epoch 7/15




Epoch 8/15




Epoch 9/15




Epoch 10/15




Epoch 11/15




Epoch 12/15




Epoch 13/15




Epoch 14/15




Epoch 15/15






In [129]:
_, acc = model.evaluate(X_test_features, y_test)
print("Test Accuracy : ", acc)

Test Accuracy :  0.925000011920929


In [130]:
y_pred = model.predict(X_test_features)



In [131]:
model.save("files/Emergency_vehicle_model.h5")

  saving_api.save_model(


In [13]:
import h5py
from tensorflow import keras
from keras.models import load_model
def Predict_Label(audio_file):
    # model = h5py.File("files\Emergency_vehicle_model.h5")
    model = load_model("files\Emergency_vehicle_model.h5")
    data = features_extractor(audio_file)
    x = np.array(data.tolist())
    X = x.reshape(1, 80, 1)
    y_pred = model.predict(X)
    y_pred = np.argmax(y_pred, axis=1)
    y_pred = int(np.median(y_pred))
    if y_pred == 0:
        print('Emergency vehicle')
    elif y_pred == 1:
        print('Traffic sound')

Predict_Label("sound data/Traffic sound/sound_402.wav")

ImportError: Filepath looks like a hdf5 file but h5pyis not available. filepath=files\Emergency_vehicle_model.h5

In [150]:
Predict_Label("sound data/Traffic sound/sound_409.wav")

Traffic sound


In [151]:
Predict_Label("sound data/Emergency vehicles/sound_17.wav")

Emergency vehicle
