In [1]:
import os
import numpy as np
import pandas as pd
import librosa
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping , ModelCheckpoint
from tensorflow.keras.models import load_model
from datetime import datetime

2024-02-14 08:28:07.737145: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
data_dir = '/Users/fahad/Desktop/sounds final  2'
classes = ['Civil', 'Police', 'Trafic', 'ambulance']

In [20]:
def extract_features(file_path, target_shape=(128, 128)):
    audio_data, sample_rate = librosa.load(file_path, sr=None)

    # Data Augmentation
    pitch_shifted = librosa.effects.pitch_shift(audio_data, sr=sample_rate, n_steps=4)
    time_stretched = librosa.effects.time_stretch(audio_data, rate=1.5)

    # Original Features
    mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
    # Augmented Features
    mel_spectrogram_pitch = librosa.feature.melspectrogram(y=pitch_shifted, sr=sample_rate)
    mel_spectrogram_stretch = librosa.feature.melspectrogram(y=time_stretched, sr=sample_rate)

    # Resizing
    mel_spectrogram_resized = resize(mel_spectrogram, target_shape)
    mel_spectrogram_pitch_resized = resize(mel_spectrogram_pitch, target_shape)
    mel_spectrogram_stretch_resized = resize(mel_spectrogram_stretch, target_shape)

    return mel_spectrogram_resized, mel_spectrogram_pitch_resized, mel_spectrogram_stretch_resized


In [21]:
def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
    data = []
    labels = []

    for i, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith('.wav'):
                file_path = os.path.join(class_dir, filename)
                original, pitch, stretch = extract_features(file_path, target_shape)
                # Append original features
                data.append(original)
                labels.append(i)
                # Append augmented features
                data.append(pitch)
                labels.append(i)
                data.append(stretch)
                labels.append(i)

    return np.array(data), np.array(labels)

In [22]:
data, labels = load_and_preprocess_data(data_dir, classes)
labels = to_categorical(labels, num_classes=len(classes))
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

In [23]:
data, l = load_and_preprocess_data(data_dir, classes)

In [24]:
pd.Series(l).value_counts()

1    132
0    129
3    129
2    111
Name: count, dtype: int64

In [25]:
model = Sequential()
model.add(Dense(256, input_shape=(128, 128), activation='relu'))  # Adjust the input_shape according to your data
model.add(Dropout(0.5))
model.add(Dense(192, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(LSTM(64))
model.add(Dense(len(classes), activation='softmax'))

In [26]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [27]:
# Display model architecture summary
model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 128, 256)          33024     
                                                                 
 dropout_4 (Dropout)         (None, 128, 256)          0         
                                                                 
 dense_9 (Dense)             (None, 128, 192)          49344     
                                                                 
 dropout_5 (Dropout)         (None, 128, 192)          0         
                                                                 
 dense_10 (Dense)            (None, 128, 128)          24704     
                                                                 
 dropout_6 (Dropout)         (None, 128, 128)          0         
                                                                 
 dense_11 (Dense)            (None, 128, 64)          

In [28]:
# Calculate pre-training accuracy
score = model.evaluate(X_test, y_test, verbose=0)
accuracy = 100*score[1]

In [29]:
# Print pre-training accuracy
print("Pre-training accuracy: %.4f%%" % accuracy)

Pre-training accuracy: 18.8119%


In [30]:
# Train the model
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_mlp.h5',
                               verbose=1, save_best_only=True)

In [31]:
start = datetime.now()

In [None]:
model.fit(X_train, y_train, batch_size=256, epochs=400, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)
duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/400
Epoch 1: val_loss improved from inf to 1.38716, saving model to saved_models/weights.best.basic_mlp.h5
Epoch 2/400


  saving_api.save_model(


Epoch 2: val_loss did not improve from 1.38716
Epoch 3/400
Epoch 3: val_loss did not improve from 1.38716
Epoch 4/400
Epoch 4: val_loss did not improve from 1.38716
Epoch 5/400
Epoch 5: val_loss did not improve from 1.38716
Epoch 6/400
Epoch 6: val_loss did not improve from 1.38716
Epoch 7/400
Epoch 7: val_loss did not improve from 1.38716
Epoch 8/400
Epoch 8: val_loss did not improve from 1.38716
Epoch 9/400
Epoch 9: val_loss did not improve from 1.38716
Epoch 10/400
Epoch 10: val_loss did not improve from 1.38716
Epoch 11/400
Epoch 11: val_loss did not improve from 1.38716
Epoch 12/400
Epoch 12: val_loss did not improve from 1.38716
Epoch 13/400
Epoch 13: val_loss did not improve from 1.38716
Epoch 14/400
Epoch 14: val_loss did not improve from 1.38716
Epoch 15/400
Epoch 15: val_loss did not improve from 1.38716
Epoch 16/400
Epoch 16: val_loss did not improve from 1.38716
Epoch 17/400
Epoch 17: val_loss did not improve from 1.38716
Epoch 18/400
Epoch 18: val_loss did not improve from

In [None]:
duration = datetime.now() - start
print("Training completed in time: ", duration)

In [None]:
# Evaluating the model on the training and testing set
score = model.evaluate(X_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

In [None]:
score = model.evaluate(X_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

In [None]:
model.save('911_class_test.h5')

In [None]:
from tensorflow.keras.models import load_model

def predict_audio_class(file_path, model, classes):
    # Assuming the extract_features function is defined as above
    features = extract_features(file_path)[0]  # Use the original features for prediction
    features = np.expand_dims(features, axis=0)  # Reshaping to match model input

    # Make the prediction
    predictions = model.predict(features)
    predicted_class = np.argmax(predictions, axis=1)
    return classes[predicted_class[0]]

# Usage
#model = load_model("/Users/irk2w/Desktop/T5/911_model_don't_touch/911_class_test.h5")  # Load your model
audio_file_path = '/Users/irk2w/Downloads/civil1.wav'
predicted_class_name = predict_audio_class(audio_file_path, model, classes)
print(f"The model predicts that the audio file is a {predicted_class_name} sound.")

In [None]:
confusion_matrix(y_train.argmax(axis = 1), model.predict(X_train).argmax(axis = 1))

In [None]:
confusion_matrix(y_test.argmax(axis = 1), model.predict(X_test).argmax(axis = 1))

In [None]:
len(y_train.argmax(axis = 1))

In [None]:
len(y_train)