In [1]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Bidirectional, LSTM ,Flatten ,Reshape

In [2]:
SR=22050
N_MFCC=40
N_MELS=128
DURATION=1
SAMPLES_PER_TRACK=SR*DURATION
CLASSES=['AssaultRifle','MachineGun','Noise']

In [3]:
def extract_features(file_path):
    y,sr=librosa.load(file_path,sr=SR,duration=DURATION)
    if len(y)<SAMPLES_PER_TRACK:
        y=np.pad(y,(0,SAMPLES_PER_TRACK-len(y)))
    else:
        y=y[:SAMPLES_PER_TRACK]
    mfcc=librosa.feature.mfcc(y=y,sr=sr,n_mfcc=N_MFCC)
    mfcc=librosa.util.fix_length(mfcc,size=174,axis=1)
    mfcc=mfcc[...,np.newaxis]
    mel_spec=librosa.feature.melspectrogram(y=y,sr=sr)
    mel_spec=librosa.power_to_db(mel_spec,ref=np.max)
    # plt.figure(figsize=(10,6))
    # librosa.display.specshow(mel_spec,sr=SR,x_axis='time',y_axis='mel')
    # plt.colorbar(format='%+2.0f dB')
    # plt.title('Mel-Spectogram')
    # plt.tight_layout()
    # plt.show()
    mel_spec=librosa.util.fix_length(mel_spec,size=174,axis=1)
    mel_spec=mel_spec[...,np.newaxis]
    return mfcc,mel_spec

In [4]:
def load_dataset(dataset_path):
    x_mfcc,x_melspec,y=[],[],[]
    for i,class_name in enumerate(CLASSES):
        class_dir=os.path.join(dataset_path,class_name)
        for file_name in os.listdir(class_dir):
            if file_name.endswith(".wav"):
                file_path=os.path.join(class_dir,file_name)
                mfcc,mel_spec=extract_features(file_path)
                x_mfcc.append(mfcc)
                x_melspec.append(mel_spec)
                y.append(i)
    x_mfcc=np.array(x_mfcc)
    x_melspec=np.array(x_melspec)
    y=to_categorical(y,num_classes=len(CLASSES))
    return x_mfcc,x_melspec,y


In [5]:
X_mfcc,X_melspec,y=load_dataset('new_dataset')

In [6]:
X_mfcc.shape

(2252, 40, 174, 1)

In [7]:
X_mfcc_trainval, X_mfcc_test, X_melspec_trainval, X_melspec_test, y_trainval, y_test = train_test_split(
    X_mfcc, X_melspec, y, test_size=0.3, random_state=2
)

# Then, split the training+validation set into training and validation sets
X_mfcc_train, X_mfcc_val, X_melspec_train, X_melspec_val, y_train, y_val = train_test_split(
    X_mfcc_trainval, X_melspec_trainval, y_trainval, test_size=0.2, random_state=2
)

In [35]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, GlobalAveragePooling2D, Dense, Input, LSTM, Bidirectional, TimeDistributed
from tensorflow.keras.models import Model

def create_model(input_shape_mfcc, input_shape_melspec, num_classes=3):
    # MFCC Input and layers
    input_mfcc = Input(shape=(input_shape_mfcc[1], input_shape_mfcc[0], 1), name='mfcc_input')  # Shape: (timesteps, features, 1)
    print("MFCC Input shape:", input_mfcc.shape)

    x_mfcc = TimeDistributed(Conv2D(16, kernel_size=(3, 3), activation='relu'))(input_mfcc)
    x_mfcc = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_mfcc)
    x_mfcc = TimeDistributed(Dropout(0.3))(x_mfcc)

    x_mfcc = TimeDistributed(Conv2D(32, kernel_size=(3, 3), activation='relu'))(x_mfcc)
    x_mfcc = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_mfcc)
    x_mfcc = TimeDistributed(Dropout(0.3))(x_mfcc)

    x_mfcc = TimeDistributed(Conv2D(64, kernel_size=(3, 3), activation='relu'))(x_mfcc)
    x_mfcc = TimeDistributed(MaxPooling2D(pool_size=(1, 2)))(x_mfcc)
    x_mfcc = TimeDistributed(Dropout(0.3))(x_mfcc)

    x_mfcc = TimeDistributed(Conv2D(128, kernel_size=(3, 3), activation='relu'))(x_mfcc)
    x_mfcc = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_mfcc)
    x_mfcc = TimeDistributed(Dropout(0.3))(x_mfcc)

    x_mfcc = TimeDistributed(GlobalAveragePooling2D())(x_mfcc)
    print("MFCC TimeDistributed output shape:", x_mfcc.shape)

    # MelSpectrogram Input and layers
    input_melspec = Input(shape=(input_shape_melspec[1], input_shape_melspec[0], 1), name='melspec_input')  # Shape: (timesteps, features, 1)
    print("MelSpec Input shape:", input_melspec.shape)

    x_melspec = TimeDistributed(Conv2D(16, kernel_size=(3, 3), activation='relu'))(input_melspec)
    x_melspec = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_melspec)
    x_melspec = TimeDistributed(Dropout(0.3))(x_melspec)

    x_melspec = TimeDistributed(Conv2D(32, kernel_size=(3, 3), activation='relu'))(x_melspec)
    x_melspec = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_melspec)
    x_melspec = TimeDistributed(Dropout(0.3))(x_melspec)

    x_melspec = TimeDistributed(Conv2D(64, kernel_size=(3, 3), activation='relu'))(x_melspec)
    x_melspec = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_melspec)
    x_melspec = TimeDistributed(Dropout(0.3))(x_melspec)

    x_melspec = TimeDistributed(Conv2D(128, kernel_size=(3, 3), activation='relu'))(x_melspec)
    x_melspec = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x_melspec)
    x_melspec = TimeDistributed(Dropout(0.3))(x_melspec)

    x_melspec = TimeDistributed(GlobalAveragePooling2D())(x_melspec)
    print("MelSpec TimeDistributed output shape:", x_melspec.shape)

    # Ensure both sequences are of the same length for concatenation
    if x_mfcc.shape[1] != x_melspec.shape[1]:
        min_length = min(x_mfcc.shape[1], x_melspec.shape[1])
        x_mfcc = x_mfcc[:, :min_length, :]
        x_melspec = x_melspec[:, :min_length, :]
    print("After length adjustment - MFCC:", x_mfcc.shape, ", MelSpec:", x_melspec.shape)

    # Concatenate the outputs of both branches
    concatenated = tf.keras.layers.concatenate([x_mfcc, x_melspec])
    print("Concatenated shape:", concatenated.shape)

    # BiLSTM Layer
    x = Bidirectional(LSTM(128, return_sequences=False))(concatenated)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    output = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=[input_mfcc, input_melspec], outputs=output)
    return model

# Define the input shapes
input_shape_mfcc = (40, 174)   # Shape updated to reflect correct dimensions
input_shape_melspec = (128, 174)  # Similarly for MelSpectrogram

# Create the model
model = create_model(input_shape_mfcc, input_shape_melspec, num_classes=3)
model.summary()


MFCC Input shape: (None, 174, 40, 1)


ValueError: Kernel shape must have the same length as input, but received kernel of shape (3, 3, 1, 16) and input of shape (None, 40, 1).

In [32]:
#import tensorflow as tf
#from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dropout,GlobalAveragePooling2D,Dense,Input,GlobalAveragePooling1D
#from tensorflow.keras.models import Model
#from tensorflow.keras.layers import TimeDistributed
#def create_model(input_shape_mfcc,input_shape_melspec,num_classes=3):
#    #input_mfcc=Input(shape=input_shape_mfcc,name='mfcc_input')
#    #x_mfcc =Reshape((*input_shape_mfcc, 1))(input_mfcc)
#    #x_mfcc = Reshape((input_shape_mfcc[0], input_shape_mfcc[1], 1))(input_mfcc)
#    input_mfcc=Input(shape=(*input_shape_mfcc,1),name='mfcc_input')
#
#    x_mfcc=TimeDistributed(Conv2D(16,kernel_size=(3,3),activation='relu'))(input_mfcc)
#    x_mfcc=TimeDistributed(MaxPooling2D(pool_size=(2,2)))(x_mfcc)
#    x_mfcc=TimeDistributed(Dropout(0.3))(x_mfcc)
#
#    x_mfcc=TimeDistributed(Conv2D(32,kernel_size=(3,3),activation='relu'))(x_mfcc)
#    x_mfcc=TimeDistributed(MaxPooling2D(pool_size=(2,2)))(x_mfcc)
#    x_mfcc=TimeDistributed(Dropout(0.3))(x_mfcc)
#
#    x_mfcc=TimeDistributed(Conv2D(64,kernel_size=(3,3),activation='relu'))(x_mfcc)
#    x_mfcc=TimeDistributed(MaxPooling2D(pool_size=(1,2)))(x_mfcc)
#    x_mfcc=TimeDistributed(Dropout(0.3))(x_mfcc)
#    
#    x_mfcc=TimeDistributed(Conv2D(128,kernel_size=(3,3),activation='relu'))(x_mfcc)
#    x_mfcc=TimeDistributed(MaxPooling2D(pool_size=(2,2)))(x_mfcc)
#    x_mfcc=TimeDistributed(Dropout(0.3))(x_mfcc)
#    
#    x_mfcc=TimeDistributed(GlobalAveragePooling2D())(x_mfcc)
#    #x_mfcc = Flatten()(x_mfcc)
#    #x_mfcc = Reshape((x_mfcc.shape[1] * x_mfcc.shape[2], 128))(x_mfcc)
#    #x_mfcc = TimeDistributed(Reshape((-1, x_mfcc.shape[-1])))(x_mfcc)
#
#    input_melspec=Input(shape=(*input_shape_melspec,1),name='melspec_input')
#    #x_melspec =Reshape((*input_shape_melspec, 1))(input_melspec)
#    #x_melspec = Reshape((input_shape_melspec[0], input_shape_melspec[1], 1))(input_melspec)
#     
#    
#    x_melspec=TimeDistributed(Conv2D(16,kernel_size=(3,3),activation='relu'))(input_melspec)
#    x_melspec=TimeDistributed(MaxPooling2D(pool_size=(2,2)))(x_melspec)
#    x_melspec=TimeDistributed(Dropout(0.3))(x_melspec)
#
#    x_melspec=TimeDistributed(Conv2D(32,kernel_size=(3,3),activation='relu'))(x_melspec)
#    x_melspec=TimeDistributed(MaxPooling2D(pool_size=(2,2)))(x_melspec)
#    x_melspec=TimeDistributed(Dropout(0.3))(x_melspec)
#
#    x_melspec=TimeDistributed(Conv2D(64,kernel_size=(3,3),activation='relu'))(x_melspec)
#    x_melspec=TimeDistributed(MaxPooling2D(pool_size=(2,2)))(x_melspec)
#    x_melspec=TimeDistributed(Dropout(0.3))(x_melspec)
#
#    x_melspec=TimeDistributed(Conv2D(128,kernel_size=(3,3),activation='relu'))(x_melspec)
#    x_melspec=TimeDistributed(MaxPooling2D(pool_size=(2,2)))(x_melspec)
#    x_melspec=TimeDistributed(Dropout(0.3))(x_melspec)
#
#    x_melspec = TimeDistributed(GlobalAveragePooling2D())(x_melspec)
#    #x_melspec =  Flatten()(x_melspec)
#    #x_melspec = Reshape((x_melspec.shape[1] * x_melspec.shape[2], 40))(x_melspec)
#    #x_melspec = TimeDistributed(Reshape((-1, x_melspec.shape[-1])))(x_melspec)
#
#    # Ensure both sequences are of the same length for concatenation
#    if x_mfcc.shape[1] != x_melspec.shape[1]:
#        min_length = min(x_mfcc.shape[1], x_melspec.shape[1])
#        x_mfcc = x_mfcc[:, :min_length, :]
#        x_melspec = x_melspec[:, :min_length, :]
#        
#    concatenated=tf.keras.layers.concatenate([x_mfcc,x_melspec])
#
#    x=Bidirectional(LSTM(128, return_sequences=False))(concatenated)
#    #x = GlobalAveragePooling1D()(x)
#    x=TimeDistributed(Dense(64,activation='relu'))(x)
#    x=TimeDistributed(Dropout(0.3))(x)
#    output=Dense(num_classes,activation='softmax')(x)
#
#    model=Model(inputs=[input_mfcc,input_melspec],outputs=output)
#    return model

In [33]:
input_shape_mfcc=(174,40)
input_shape_melspec=(174,128)
model=create_model(input_shape_mfcc,input_shape_melspec,num_classes=3)


MFCC Input shape: (None, 174, 40, 1)


ValueError: Kernel shape must have the same length as input, but received kernel of shape (3, 3, 1, 16) and input of shape (None, 40, 1).

In [None]:
from tensorflow.keras.metrics import AUC
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy','Precision',AUC(),'Recall'])
model.summary()

In [None]:
#import matplotlib.pyplot as plt

checkpoint=tf.keras.callbacks.ModelCheckpoint('best_model.keras',monitor='val_accuracy',save_best_only=True,mode='max')
losscheck=tf.keras.callbacks.ModelCheckpoint('best_model.keras',monitor='val_loss',save_best_only=True,mode='min')
# Assuming `model` is your compiled model and you have train and validation data
history = model.fit([X_mfcc_train, X_melspec_train], y_train, 
                    validation_data=([X_mfcc_val, X_melspec_val], y_val),
                    epochs=50,  # Number of epochs you want to train
                    batch_size=32,callbacks=[checkpoint,losscheck])


results=model.evaluate([X_mfcc_test,X_melspec_test],y_test)
test_loss,test_accuracy=results[0],results[1]
print(f'Test Accuracy:{test_accuracy*100:.2f}%')
print(f'Test Loss:{test_loss*100:.2f}%')


#Plot training & validation accuracy values
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')

# Adding labels and title
plt.title('Model Accuracy vs Epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(loc='upper left')

epochs = len(history.history['accuracy'])
plt.xticks(np.arange(0, epochs+1, 10))  # Scale of 10 on x-axis


# Display the plot
plt.show()



plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')

# Adding labels and title
plt.title('Model loss vs Epochs')
plt.ylabel('loss')
plt.xlabel('Epochs')
plt.legend(loc='upper left')

epochs = len(history.history['loss'])
plt.xticks(np.arange(0, epochs+1, 10))  # Scale of 10 on x-axis


# Display the plot
plt.show()



