In [3]:
from datetime import datetime
from os import listdir
from os.path import isfile, join
import librosa
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, GlobalAveragePooling2D, GlobalAveragePooling1D,BatchNormalization, Flatten, Dense, Multiply,multiply, Reshape, Concatenate
from tensorflow.keras.models import Model
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import random
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report,mean_squared_error,accuracy_score


In [1]:
# Define the function to extract MFCC features
max_pad_len=862
def extract_mfcc_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast', duration=20) 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        return mfccs
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None


In [4]:
# Define the path to the audio files
#mypath = "/kaggle/input/respiratory-sound-database/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files/"
mypath=r"E:\Lea!n\DataScience\Respiratory_Sound_Database\Respiratory_Sound_Database\audio_and_txt_files"
filenames = [file for file in listdir(mypath) if (isfile(join(mypath, file)) and file.endswith('.wav'))]

# Create empty lists to store features and labels
features = []
labels = []

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\Users\\J.Abdullah\\Desktop\\Lea!n\\DataScience\\Respiratory_Sound_Database\\Respiratory_Sound_Database\\audio_and_txt_files'

In [4]:
# Iterate through each sound file and extract the features
for file_name in filenames:
    # Extract MFCC features and append to the features list
    mfccs = extract_mfcc_features(join(mypath, file_name))
    if mfccs is not None:
        features.append(mfccs)
        
        # Extract the label from the file name (assuming the label is in the file name format)
        label = file_name.split('_')[2]  # Assuming the label is the third element in the file name
        labels.append(label)


In [34]:
# Convert the labels into numerical format using Label Encoding
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Convert the lists into numpy arrays
features = np.array(features)
labels_encoded = np.array(labels_encoded)

# Print the shape of the feature matrix and the label array
print("Shape of features: ", features.shape)
print("Shape of labels: ", labels_encoded.shape)

Shape of features:  (920, 40, 862)
Shape of labels:  (920,)


In [6]:
mfccs.shape

(40, 862)

In [35]:
# Build the Attention CNN model
def build_attention_cnn_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    # Convolutional layers
    x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Attention layer
    attention_weights = Dense(1, activation='tanh')(x)
    attention_weights = Flatten()(attention_weights)
    attention_weights = Dense(x.shape[1] * x.shape[2], activation='softmax')(attention_weights)
    attention_weights = Reshape((x.shape[1], x.shape[2], 1))(attention_weights)
    x = multiply([x, attention_weights])

    # Global Average Pooling and Dense layers
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)

    return model
'''
Changes made to fix the issue:

Replace Multiply() with multiply() from tensorflow.keras.layers.
Use Dense(x.shape[1] * x.shape[2], activation='softmax') to match the shape of the intermediate tensor for attention weights calculation.
Reshape the attention weights using Reshape((x.shape[1], x.shape[2], 1)) to obtain the same shape as the intermediate tensor.
'''

"\nChanges made to fix the issue:\n\nReplace Multiply() with multiply() from tensorflow.keras.layers.\nUse Dense(x.shape[1] * x.shape[2], activation='softmax') to match the shape of the intermediate tensor for attention weights calculation.\nReshape the attention weights using Reshape((x.shape[1], x.shape[2], 1)) to obtain the same shape as the intermediate tensor.\n"

In [36]:
# Define the input shape for both models
max_pad_len = max(mfcc.shape[1] for mfcc in features)
attention_cnn_input_shape = (40, max_pad_len, 1)  # MFCC features shape
num_classes = len(np.unique(labels_encoded))

# Build the Attention CNN model
attention_cnn_model = build_attention_cnn_model(attention_cnn_input_shape, num_classes)


In [37]:
max_pad_len

862

In [22]:
print('attention_cnn_model.ouput.shape=',attention_cnn_model.output.shape)
print('attention_cnn_model.input.shape=',attention_cnn_model.input.shape)
print('features.shape=',features.shape)
print('labels_encoded.shape=',labels_encoded.shape)

attention_cnn_model.ouput.shape= (None, 7)
attention_cnn_model.input.shape= (None, 40, 862, 1)
features.shape= (920, 40, 862)
labels_encoded.shape= (920,)


In [23]:
# Compile the hybrid model
attention_cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
attention_cnn_model.summary()




Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 40, 862, 1)  0           []                               
                                ]                                                                 
                                                                                                  
 conv2d_6 (Conv2D)              (None, 38, 860, 32)  320         ['input_4[0][0]']                
                                                                                                  
 max_pooling2d_6 (MaxPooling2D)  (None, 19, 430, 32)  0          ['conv2d_6[0][0]']               
                                                                                                  
 conv2d_7 (Conv2D)              (None, 17, 428, 64)  18496       ['max_pooling2d_6[0][0]']  

In [15]:
# Split the dataset into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.3, random_state=42,shuffle=True)
# Convert labels to one-hot encoded format
y_train_one_hot = to_categorical(y_train, num_classes)
y_test_one_hot = to_categorical(y_test, num_classes)

In [16]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
# Define callbacks for model training
checkpoint = ModelCheckpoint('attention_cnn_model.h5', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

In [30]:
history = attention_cnn_model.fit(
    X_train, y_train_one_hot,
    batch_size=32,
    epochs=50,
    verbose=1,
    validation_split=0.1
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [31]:

y_pred = attention_cnn_model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
print(y_pred[0])
print(y_pred_labels[0])
print(classification_report(y_test, y_pred_labels))

[0.10067964 0.14811072 0.08624099 0.06991067 0.4086174  0.1816766
 0.00476404]
4
              precision    recall  f1-score   support

           0       0.29      0.30      0.29        47
           1       0.29      0.48      0.36        48
           2       0.00      0.00      0.00        21
           3       0.29      0.13      0.18        39
           4       0.31      0.38      0.34        39
           5       0.31      0.28      0.29        40
           6       0.56      0.64      0.60        42

    accuracy                           0.34       276
   macro avg       0.29      0.32      0.30       276
weighted avg       0.32      0.34      0.32       276



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
score = attention_cnn_model.evaluate(X_test, y_test_one_hot, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

In [33]:
# Calculate RMSE (Root Mean Squared Error)
rmse = np.sqrt(mean_squared_error(np.argmax(y_test_one_hot, axis=1), y_pred_labels))
print("RMSE:", rmse)

# Calculate accuracy
accuracy = accuracy_score(np.argmax(y_test_one_hot, axis=1), y_pred_labels)
print("Accuracy:", accuracy)

RMSE: 2.3583032465945832
Accuracy: 0.3442028985507246
