In [1]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import librosa
import wave
import pandas as pd
import soundfile as sf
import librosa.display
from PIL import Image
import os
import tensorflow as tf
import scipy.signal as signal
from tqdm import tqdm
import random

In [2]:
# Step 1: Denoising using a low pass filter
def apply_low_pass_filter(audio, sampling_rate, cutoff_freq):
    nyquist_freq = 0.5 * sampling_rate
    normalized_cutoff_freq = cutoff_freq / nyquist_freq
    b, a = signal.butter(4, normalized_cutoff_freq, btype='low', analog=False)
    denoised_audio = signal.lfilter(b, a, audio)
    return denoised_audio

In [3]:
# Downsampling audio
def downsample_audio(audio,original_sampling_rate,target_sampling_rate):
    resampled_audio = librosa.resample(audio, orig_sr=original_sampling_rate, target_sr=target_sampling_rate)
    return resampled_audio

In [4]:
# Split audio into fixed-length segments
def split_audio(audio, segment_length):
    num_segments = len(audio) // segment_length
    segments = [audio[i*segment_length:(i+1)*segment_length] for i in range(num_segments)]
    return segments

In [5]:
def norm_mfcc(audio_path):

    # Load audio file
    audio, sr = librosa.load(audio_path, sr=None)

    # Compute MFCC features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=48)

    # Normalize MFCC values
    normalized_mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)
    return normalized_mfcc

In [6]:
def time_warp(mfcc, time_warping_range):
    num_frames = mfcc.shape[1]
    t = np.arange(num_frames)
    random_shift = np.random.randint(-time_warping_range, time_warping_range)
    warped_t = np.clip(t + random_shift, 0, num_frames - 1)
    warped_mfcc = mfcc[:, warped_t]
    return warped_mfcc[:, :48]

In [7]:
def frequency_mask(mfcc, frequency_masking_para, mask_max_frames):
    num_mfcc = mfcc.shape[0]
    num_frames = mfcc.shape[1]
    f = np.random.randint(0, frequency_masking_para)
    f0 = random.randint(0, num_mfcc - f)
    mfcc[f0:f0 + f, :] = 0.0
    return mfcc[:, :48]

In [65]:
input_shape = (39,32)  # Example input shape for MobileNetV2

In [66]:
MFCC_Dataset=[]

In [67]:
# Print the file names
def data(root_directory,final_class_labels):
    for folder_name, subfolders, filenames in os.walk(root_directory):
        # Iterate through audio files in the current folder
        for filename in filenames:
            if filename.lower().endswith(('.wav')):
#                 Construct the full path to the audio file
                audio_path = os.path.join(folder_name, filename)
                mfcc_array=norm_mfcc(audio_path)
                # Reshape MFCC array
                mfcc_reshaped = np.reshape(mfcc_array,input_shape)
                MFCC_Dataset.append([mfcc_reshaped,final_class_labels])

In [68]:
# Print the file names
def normal_data(root_directory,final_class_labels):
    for folder_name, subfolders, file_names in os.walk(root_directory):
        for file_name in file_names:
            if file_name.lower().endswith(('.wav')):
                audio_path = os.path.join(folder_name, file_name)
                mfcc_array=norm_mfcc(audio_path)
                # Reshape MFCC array
                mfcc_reshaped = np.reshape(mfcc_array, input_shape)
                MFCC_Dataset.append([mfcc_reshaped,final_class_labels])

                # Set time warping range (adjust according to your needs)
                time_warping_range = 5

                warped_mfcc = time_warp(mfcc_array, time_warping_range)
                # Reshape MFCC array
                mfcc_reshaped = np.reshape(warped_mfcc,input_shape)
                MFCC_Dataset.append([mfcc_reshaped,final_class_labels])

                # Set frequency masking parameters (adjust according to your needs)
                frequency_masking_para = 5
                mask_max_frames = 5

                masked_mfcc = frequency_mask(mfcc_array, frequency_masking_para, mask_max_frames)
                masked_mfcc2 = frequency_mask(mfcc_array, frequency_masking_para, mask_max_frames)
                # Reshape MFCC array
                mfcc_reshaped = np.reshape(masked_mfcc, input_shape)
                MFCC_Dataset.append([mfcc_reshaped,final_class_labels])
                mfcc_reshaped = np.reshape(masked_mfcc2, input_shape)
                MFCC_Dataset.append([mfcc_reshaped,final_class_labels])

In [69]:
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Augmented-A/Atraining_artifact','artifact')
normal_data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Split-A/Atraining_artifact','artifact')

In [70]:
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Augmented-A/Atraining_extrahls','extrahls')
normal_data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Split-A/Atraining_extrahls','extrahls')

In [98]:
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Augmented-A/Atraining_murmur','murmur')
normal_data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Split-A/Atraining_murmur','murmur')

In [99]:
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Augmented-A/Atraining_normal','normal')
normal_data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Split-A/Atraining_normal','normal')

In [73]:
len(MFCC_Dataset)

1222

In [74]:
MFCC_Df=pd.DataFrame(MFCC_Dataset, columns=['MFCC', 'Class'])

In [75]:
MFCC_Df

Unnamed: 0,MFCC,Class
0,"[[-3.1142056, -3.6126595, -6.192634, -6.212791...",artifact
1,"[[-6.322169, -6.40755, -6.569402, -6.644149, -...",artifact
2,"[[-4.500682, -4.984742, -6.77323, -6.7310724, ...",artifact
3,"[[-5.4014797, -5.7350993, -6.7023306, -6.67766...",artifact
4,"[[-4.9807153, -5.289154, -6.4026146, -6.405455...",artifact
...,...,...
1217,"[[-3.2540019, -3.8806489, -6.5898547, -6.68495...",normal
1218,"[[-5.724358, -6.1948705, -6.557403, -6.5594387...",normal
1219,"[[-5.724358, -5.724358, -5.724358, -5.724358, ...",normal
1220,"[[-5.724358, -6.1948705, -6.557403, -6.5594387...",normal


In [76]:
MFCC_Df['Class'].value_counts()

Class
artifact    720
extrahls    222
normal      148
murmur      132
Name: count, dtype: int64

In [77]:
for i in MFCC_Df["MFCC"]:
    print(i.shape)

(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(

In [78]:
# Define the column value to filter
column_value = 'Artifact'

# Filter the DataFrame based on the column value
filtered_df = MFCC_Df[MFCC_Df['Class'] == column_value]

# Calculate the number of rows to drop (half of the filtered rows)
rows_to_drop = len(filtered_df) // 2

# Randomly select rows to drop
rows = np.random.choice(filtered_df.index, rows_to_drop, replace=False)

# Drop the selected rows
MFCC_Final = MFCC_Df.drop(rows)

# Print the updated DataFrame
print(MFCC_Final)

                                                   MFCC     Class
0     [[-3.1142056, -3.6126595, -6.192634, -6.212791...  artifact
1     [[-6.322169, -6.40755, -6.569402, -6.644149, -...  artifact
2     [[-4.500682, -4.984742, -6.77323, -6.7310724, ...  artifact
3     [[-5.4014797, -5.7350993, -6.7023306, -6.67766...  artifact
4     [[-4.9807153, -5.289154, -6.4026146, -6.405455...  artifact
...                                                 ...       ...
1217  [[-3.2540019, -3.8806489, -6.5898547, -6.68495...    normal
1218  [[-5.724358, -6.1948705, -6.557403, -6.5594387...    normal
1219  [[-5.724358, -5.724358, -5.724358, -5.724358, ...    normal
1220  [[-5.724358, -6.1948705, -6.557403, -6.5594387...    normal
1221  [[-5.724358, -6.1948705, -6.557403, -6.5594387...    normal

[1222 rows x 2 columns]


In [79]:
X=np.array(MFCC_Final["MFCC"].tolist())
y=np.array(MFCC_Final["Class"].tolist())

In [80]:
X.shape

(1222, 39, 32)

In [81]:
input_shape = (39, 32, 3)
X = np.repeat(X[..., np.newaxis], 3, axis=-1)

In [82]:
X.shape

(1222, 39, 32, 3)

In [83]:
np.unique(y)

array(['artifact', 'extrahls', 'murmur', 'normal'], dtype='<U8')

In [84]:
y=np.array(pd.get_dummies(y))

In [85]:
y

array([[ True, False, False, False],
       [ True, False, False, False],
       [ True, False, False, False],
       ...,
       [False, False, False,  True],
       [False, False, False,  True],
       [False, False, False,  True]])

In [86]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [87]:
X_train.shape

(977, 39, 32, 3)

In [88]:
X_train,X_valid,y_train,y_valid=train_test_split(X_train,y_train,test_size=0.15,random_state=42)

In [89]:
X_train.shape

(830, 39, 32, 3)

In [90]:
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Conv2D,MaxPooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization

In [91]:
from tensorflow import keras
base_model=keras.applications.MobileNetV2(
    input_shape=(39,32,3),
    include_top=False,
    weights='imagenet'
)

base_model.trainable = False

model_1 = keras.Sequential([
    base_model,
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(4, activation='softmax')
])

model_1.summary()

Metal device set to: Apple M1
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Funct  (None, 2, 1, 1280)       2257984   
 ional)                                                          
                                                                 
 flatten (Flatten)           (None, 2560)              0         
                                                                 
 dense (Dense)               (None, 64)                163904    
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 4)                 260    

In [92]:
model_1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [93]:
checkpoint = ModelCheckpoint('heart_mobilenet_mfcc.hdf5', save_best_only=True, save_weights_only=False, monitor='val_accuracy', mode='max')
history = model_1.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_valid, y_valid), callbacks=[checkpoint])

Epoch 1/50


2023-07-14 12:43:58.397536: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [94]:
test_accuracy=model_1.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

0.9061224460601807


In [95]:
def pred(audio_path, model_path):
    audio, sampling_rate = librosa.load(audio_path, sr=None)
    # Denoising
    cutoff_frequency = 195
    denoised_audio = apply_low_pass_filter(audio, sampling_rate, cutoff_frequency)

    # Downsampling
    target_sampling_rate = sampling_rate // 10
    downsampled_audio = downsample_audio(denoised_audio, sampling_rate, target_sampling_rate)

    # Splitting audio
    segment_length = target_sampling_rate * 3
    segments = split_audio(downsampled_audio, segment_length)
    
    model = keras.models.load_model(model_path)
    i=0
    
    for segment in segments:
        # Compute MFCC features
        mfcc = librosa.feature.mfcc(y=segment, sr=target_sampling_rate, n_mfcc=48)

        # Normalize MFCC values
        normalized_mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)
        mfcc_reshaped = np.reshape(normalized_mfcc,(1,39,32))
        
        input_shape = (39, 32, 3)
        mfcc_final = np.repeat(mfcc_reshaped[..., np.newaxis], 3, axis=-1)
        
        prediction = model.predict(mfcc_final)
        x=np.argmax(prediction)
        confidence = prediction[0, x]
        i=i+1

#         prediction = model.predict(img_arr)
#         x=np.argmax(prediction)

        classes={0:'Artifact', 1:'Extrasystole', 2:'Murmur', 3:'Normal'}
        print(classes[x],confidence)
#         print(mfcc_final.shape)

In [96]:
audio_path="/Users/rishikabethi/Desktop/Heart/Dataset_A/Atraining_extrahls/201101160808.wav"
model_path="/Users/rishikabethi/Desktop/Heart/heart_mobilenet_mfcc.hdf5"
pred(audio_path,model_path)

Extrasystole 0.92877805
Extrasystole 0.970611


In [97]:
audio_path="/Users/rishikabethi/Desktop/Heart/Dataset_A/Atraining_extrahls/201101160808.wav"
model_path="/Users/rishikabethi/Desktop/Heart/heart_mobilenet_mfcc.hdf5"
pred(audio_path,model_path)

Extrasystole 0.92877805
Extrasystole 0.970611


In [64]:
audio_path="/Users/rishikabethi/Desktop/Heart/Dataset_A/Atraining_extrahls/murmur_1.wav"
model_path="/Users/rishikabethi/Documents/GitHub/Heart-Sound-Classification/Heart/Notebooks/heart_mobilenet_mfcc.hdf5"
pred(audio_path,model_path)

Artifact 0.999977
Artifact 0.99228865
Murmur 0.9982432
Murmur 0.99995935
Normal 0.865291
Murmur 0.9908213
Murmur 0.9873416
Artifact 0.961495
Murmur 0.9454602
Murmur 0.8226908
Murmur 0.73364216
Murmur 0.94140005
Normal 0.9934191
Artifact 0.7615139
Artifact 0.8350817
Murmur 0.6350628
Murmur 0.9993212
Murmur 0.65904564
Normal 0.4804638
Artifact 0.9987429
Murmur 0.5849357
Artifact 0.9568558
