In [37]:
import os
import numpy as np
import pandas as pd
import librosa
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping , ModelCheckpoint
from tensorflow.keras.models import load_model
from datetime import datetime

In [36]:
data_dir = '/Users/irk2w/Desktop/T5/sounds 2'
classes = ['Civil', 'Police', 'Trafic', 'ambulance']

In [38]:
def extract_features(file_path, target_shape=(128, 128)):
    audio_data, sample_rate = librosa.load(file_path, sr=None)

    # Data Augmentation
    pitch_shifted = librosa.effects.pitch_shift(audio_data, sr=sample_rate, n_steps=4)
    time_stretched = librosa.effects.time_stretch(audio_data, rate=1.5)

    # Original Features
    mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
    # Augmented Features
    mel_spectrogram_pitch = librosa.feature.melspectrogram(y=pitch_shifted, sr=sample_rate)
    mel_spectrogram_stretch = librosa.feature.melspectrogram(y=time_stretched, sr=sample_rate)

    # Resizing
    mel_spectrogram_resized = resize(mel_spectrogram, target_shape)
    mel_spectrogram_pitch_resized = resize(mel_spectrogram_pitch, target_shape)
    mel_spectrogram_stretch_resized = resize(mel_spectrogram_stretch, target_shape)

    return mel_spectrogram_resized, mel_spectrogram_pitch_resized, mel_spectrogram_stretch_resized


In [39]:
def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
    data = []
    labels = []

    for i, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith('.wav'):
                file_path = os.path.join(class_dir, filename)
                original, pitch, stretch = extract_features(file_path, target_shape)
                # Append original features
                data.append(original)
                labels.append(i)
                # Append augmented features
                data.append(pitch)
                labels.append(i)
                data.append(stretch)
                labels.append(i)

    return np.array(data), np.array(labels)

In [40]:
data, labels = load_and_preprocess_data(data_dir, classes)
labels = to_categorical(labels, num_classes=len(classes))
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

  audio_data, sample_rate = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


In [41]:
data, l = load_and_preprocess_data(data_dir, classes)

  audio_data, sample_rate = librosa.load(file_path, sr=None)


In [42]:
pd.Series(l).value_counts()

0    666
1    576
2    456
3    444
Name: count, dtype: int64

In [43]:
model = Sequential()
model.add(Dense(256, input_shape=(128, 128), activation='relu'))  # Adjust the input_shape according to your data
model.add(Dropout(0.5))
model.add(Dense(192, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(LSTM(64))
model.add(Dense(len(classes), activation='softmax'))

In [44]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [45]:
# Display model architecture summary
model.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 128, 256)          33024     
                                                                 
 dropout_8 (Dropout)         (None, 128, 256)          0         
                                                                 
 dense_17 (Dense)            (None, 128, 192)          49344     
                                                                 
 dropout_9 (Dropout)         (None, 128, 192)          0         
                                                                 
 dense_18 (Dense)            (None, 128, 128)          24704     
                                                                 
 dropout_10 (Dropout)        (None, 128, 128)          0         
                                                                 
 dense_19 (Dense)            (None, 128, 64)          

In [46]:
# Calculate pre-training accuracy
score = model.evaluate(X_test, y_test, verbose=0)
accuracy = 100*score[1]

In [47]:
# Print pre-training accuracy
print("Pre-training accuracy: %.4f%%" % accuracy)

Pre-training accuracy: 29.1375%


In [48]:
# Train the model
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_mlp.h5',
                               verbose=1, save_best_only=True)

In [49]:
start = datetime.now()

In [50]:
model.fit(X_train, y_train, batch_size=128, epochs=500, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)
duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/500
Epoch 1: val_loss improved from inf to 1.37900, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 2/500
 1/14 [=>............................] - ETA: 1s - loss: 1.3987 - accuracy: 0.2969

  saving_api.save_model(


Epoch 2: val_loss improved from 1.37900 to 1.37760, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 3/500
Epoch 3: val_loss improved from 1.37760 to 1.37006, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 4/500
Epoch 4: val_loss improved from 1.37006 to 1.35146, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 5/500
Epoch 5: val_loss improved from 1.35146 to 1.32859, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 6/500
Epoch 6: val_loss improved from 1.32859 to 1.31277, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 7/500
Epoch 7: val_loss improved from 1.31277 to 1.29857, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 8/500
Epoch 8: val_loss improved from 1.29857 to 1.27614, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 9/500
Epoch 9: val_loss improved from 1.27614 to 1.22076, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 10/500
Epoch 10: val_loss improved from 1.22076 to 1.1

In [51]:
# Evaluating the model on the training and testing set
score = model.evaluate(X_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

Training Accuracy:  0.9982486963272095


In [52]:
score = model.evaluate(X_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Testing Accuracy:  0.9254079461097717


In [64]:
model.save('911_class_best.h5')

  saving_api.save_model(


In [73]:
from tensorflow.keras.models import load_model

def predict_audio_class(file_path, model, classes):
    # Assuming the extract_features function is defined as above
    features = extract_features(file_path)[0]  # Use the original features for prediction
    features = np.expand_dims(features, axis=0)  # Reshaping to match model input

    # Make the prediction
    predictions = model.predict(features)
    predicted_class = np.argmax(predictions, axis=1)
    return classes[predicted_class[0]]

# Usage
model = load_model("/Users/irk2w/Desktop/T5/911_model_don't_touch/911_class_best.h5")  # Load your model
audio_file_path = '/Users/irk2w/Downloads/New Recording 67.wav'
predicted_class_name = predict_audio_class(audio_file_path, model, classes)
print(f"The model predicts that the audio file is a {predicted_class_name} sound.")

The model predicts that the audio file is a Civil sound.


In [61]:
confusion_matrix(y_train.argmax(axis = 1), model.predict(X_train).argmax(axis = 1))



array([[527,   0,   0,   0],
       [  0, 475,   0,   0],
       [  0,   0, 361,   0],
       [  1,   0,   0, 349]])

In [62]:
confusion_matrix(y_test.argmax(axis = 1), model.predict(X_test).argmax(axis = 1))



array([[130,   2,   6,   1],
       [  5,  95,   0,   1],
       [  1,   3,  86,   5],
       [  1,   1,   4,  88]])

In [63]:
len(y_train.argmax(axis = 1))

1713

In [64]:
len(y_train)

1713