In [1]:
import matplotlib.pyplot as plt
import numpy as np
import librosa
import wave
import pandas as pd
import soundfile as sf
import librosa.display
import IPython.display as ipd
from PIL import Image
import os
import scipy.signal as signal
from tqdm import tqdm
import random

In [2]:
# Step 1: Denoising using a low pass filter
def apply_low_pass_filter(audio, sampling_rate, cutoff_freq):
    nyquist_freq = 0.5 * sampling_rate
    normalized_cutoff_freq = cutoff_freq / nyquist_freq
    b, a = signal.butter(4, normalized_cutoff_freq, btype='low', analog=False)
    denoised_audio = signal.lfilter(b, a, audio)
    return denoised_audio

In [3]:
# Downsampling audio
def downsample_audio(audio,original_sampling_rate,target_sampling_rate):
    resampled_audio = librosa.resample(audio, orig_sr=original_sampling_rate, target_sr=target_sampling_rate)
    return resampled_audio

In [4]:
# Split audio into fixed-length segments
def split_audio(audio, segment_length):
    num_segments = len(audio) // segment_length
    segments = [audio[i*segment_length:(i+1)*segment_length] for i in range(num_segments)]
    return segments

In [5]:
def norm_mfcc(audio_path):

    # Load audio file
    audio, sr = librosa.load(audio_path, sr=None)

    # Compute MFCC features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=48)

    # Normalize MFCC values
    normalized_mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)
    return normalized_mfcc

In [6]:
def time_warp(mfcc, time_warping_range):
    num_frames = mfcc.shape[1]
    t = np.arange(num_frames)
    random_shift = np.random.randint(-time_warping_range, time_warping_range)
    warped_t = np.clip(t + random_shift, 0, num_frames - 1)
    warped_mfcc = mfcc[:, warped_t]
    return warped_mfcc[:, :48]

In [7]:
def frequency_mask(mfcc, frequency_masking_para, mask_max_frames):
    num_mfcc = mfcc.shape[0]
    num_frames = mfcc.shape[1]
    f = np.random.randint(0, frequency_masking_para)
    f0 = random.randint(0, num_mfcc - f)
    mfcc[f0:f0 + f, :] = 0.0
    return mfcc[:, :48]

In [8]:
Df_A=pd.read_csv(r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Dataframes\Heart_Audio_Default_Folders_A.csv")

In [9]:
Df_A

Unnamed: 0.1,Unnamed: 0,File,Class,Folder
0,0,201012172012.wav,Artifact,Atraining_artifact
1,1,201105040918.wav,Artifact,Atraining_artifact
2,2,201105041959.wav,Artifact,Atraining_artifact
3,3,201105051017.wav,Artifact,Atraining_artifact
4,4,201105060108.wav,Artifact,Atraining_artifact
...,...,...,...,...
119,119,201103200218.wav,Extrasystole,Atraining_extrahls
120,120,201104021355.wav,Extrasystole,Atraining_extrahls
121,121,201104140118.wav,Extrasystole,Atraining_extrahls
122,122,201104270458.wav,Extrasystole,Atraining_extrahls


In [10]:
input_shape = (39,32)  # Example input shape for MobileNetV2

In [11]:
MFCC_Dataset=[]
folder_path = r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Time_Shifted_Audio"

# Get the file names from the folder
file_names = os.listdir(folder_path)

# Print the file names
for file_name in file_names:
    mfcc_class=Df_A.loc[Df_A["File"]==(str(file_name[:12])+".wav"),"Class"].iloc[0]
    audio_path = os.path.join(os.path.abspath(folder_path),file_name)
    mfcc_array=norm_mfcc(audio_path)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(mfcc_array,input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])

In [12]:
len(MFCC_Dataset)

290

In [13]:
folder_path = r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Pitch_Shifted_Audio"

# Get the file names from the folder
file_names = os.listdir(folder_path)

# Print the file names
for file_name in file_names:
    mfcc_class=Df_A.loc[Df_A["File"]==(str(file_name[:12])+".wav"),"Class"].iloc[0]
    audio_path = os.path.join(os.path.abspath(folder_path),file_name)
    mfcc_array=norm_mfcc(audio_path)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(mfcc_array,input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])

In [14]:
len(MFCC_Dataset)

580

In [15]:
folder_path = r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Normalized_Audio"

# Get the file names from the folder
file_names = os.listdir(folder_path)

# Print the file names
for file_name in file_names:
    mfcc_class=Df_A.loc[Df_A["File"]==(str(file_name[:12])+".wav"),"Class"].iloc[0]
    audio_path = os.path.join(os.path.abspath(folder_path),file_name)
    mfcc_array=norm_mfcc(audio_path)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(mfcc_array, input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])
    
    # Set time warping range (adjust according to your needs)
    time_warping_range = 5

    warped_mfcc = time_warp(mfcc_array, time_warping_range)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(warped_mfcc,input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])
    
    # Set frequency masking parameters (adjust according to your needs)
    frequency_masking_para = 5
    mask_max_frames = 5

    masked_mfcc = frequency_mask(mfcc_array, frequency_masking_para, mask_max_frames)
    masked_mfcc2 = frequency_mask(mfcc_array, frequency_masking_para, mask_max_frames)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(masked_mfcc, input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])
    mfcc_reshaped = np.reshape(masked_mfcc2, input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])

In [16]:
len(MFCC_Dataset)

1740

In [17]:
MFCC_Df=pd.DataFrame(MFCC_Dataset, columns=['MFCC', 'Class'])

In [18]:
MFCC_Df

Unnamed: 0,MFCC,Class
0,"[[-7.1183896, -7.124328, -7.135321, -6.925219,...",Artifact
1,"[[-7.2328024, -7.1927686, -7.1960464, -7.21481...",Artifact
2,"[[-7.127256, -6.878575, -6.8865304, -6.8151107...",Artifact
3,"[[-6.463061, -6.445672, -6.377971, -6.424771, ...",Murmur
4,"[[-5.1188455, -5.630413, -6.7886634, -6.723926...",Murmur
...,...,...
1735,"[[-6.6211963, -6.4577603, -6.398823, -6.444192...",Murmur
1736,"[[-6.3066964, -6.2473, -6.4322557, -6.5906267,...",Murmur
1737,"[[-6.3066964, -6.3066964, -6.3066964, -6.2473,...",Murmur
1738,"[[-6.3066964, -6.2473, -6.4322557, -6.5906267,...",Murmur


In [19]:
MFCC_Df['Class'].value_counts()

Class
Artifact        720
Normal          408
Murmur          390
Extrasystole    222
Name: count, dtype: int64

In [37]:
for i in MFCC_Df["MFCC"]:
    print(i.shape)

(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(

In [38]:
# Define the column value to filter
column_value = 'Artifact'

# Filter the DataFrame based on the column value
filtered_df = MFCC_Df[MFCC_Df['Class'] == column_value]

# Calculate the number of rows to drop (half of the filtered rows)
rows_to_drop = len(filtered_df) // 2

# Randomly select rows to drop
rows = np.random.choice(filtered_df.index, rows_to_drop, replace=False)

# Drop the selected rows
MFCC_Final = MFCC_Df.drop(rows)

# Print the updated DataFrame
print(MFCC_Final)

                                                   MFCC     Class
1     [[-7.2328024, -7.1927686, -7.1960464, -7.21481...  Artifact
2     [[-7.127256, -6.878575, -6.8865304, -6.8151107...  Artifact
3     [[-6.463061, -6.445672, -6.377971, -6.424771, ...    Murmur
4     [[-5.1188455, -5.630413, -6.7886634, -6.723926...    Murmur
5     [[-6.5309367, -6.5444617, -6.552616, -6.467783...    Murmur
...                                                 ...       ...
1735  [[-6.6211963, -6.4577603, -6.398823, -6.444192...    Murmur
1736  [[-6.3066964, -6.2473, -6.4322557, -6.5906267,...    Murmur
1737  [[-6.3066964, -6.3066964, -6.3066964, -6.2473,...    Murmur
1738  [[-6.3066964, -6.2473, -6.4322557, -6.5906267,...    Murmur
1739  [[-6.3066964, -6.2473, -6.4322557, -6.5906267,...    Murmur

[1380 rows x 2 columns]


In [39]:
X=np.array(MFCC_Final["MFCC"].tolist())
y=np.array(MFCC_Final["Class"].tolist())

In [40]:
X.shape

(1380, 39, 32)

In [41]:
input_shape = (39, 32, 3)
X = np.repeat(X[..., np.newaxis], 3, axis=-1)

In [42]:
X.shape

(1380, 39, 32, 3)

In [43]:
np.unique(y)

array(['Artifact', 'Extrasystole', 'Murmur', 'Normal'], dtype='<U12')

In [44]:
y=np.array(pd.get_dummies(y))

In [45]:
y

array([[ True, False, False, False],
       [ True, False, False, False],
       [False, False,  True, False],
       ...,
       [False, False,  True, False],
       [False, False,  True, False],
       [False, False,  True, False]])

In [46]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [47]:
X_train.shape

(1104, 39, 32, 3)

In [48]:
X_train,X_valid,y_train,y_valid=train_test_split(X_train,y_train,test_size=0.15,random_state=42)

In [49]:
X_train.shape

(938, 39, 32, 3)

In [50]:
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Conv2D,MaxPooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization

In [51]:
from tensorflow import keras
base_model=keras.applications.MobileNetV2(
    input_shape=(39,32,3),
    include_top=False,
    weights='imagenet'
)

base_model.trainable = False

model_1 = keras.Sequential([
    base_model,
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(4, activation='softmax')
])

model_1.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Funct  (None, 2, 1, 1280)       2257984   
 ional)                                                          
                                                                 
 flatten_1 (Flatten)         (None, 2560)              0         
                                                                 
 dense_2 (Dense)             (None, 64)                163904    
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_3 (Dense)             (None, 4)                 260       
                                                                 
Total params: 2,422,148
Trainable params: 164,164
Non-trainable params: 2,257,984
______________________________________

In [52]:
model_1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [53]:
checkpoint = ModelCheckpoint('heart_mobilenet_mfcc_1.hdf5', save_best_only=True, save_weights_only=False, monitor='val_accuracy', mode='max')
history = model_1.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_valid, y_valid), callbacks=[checkpoint])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [54]:
test_accuracy=model_1.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

0.8804348111152649


In [62]:
def pred(audio_path, model_path):
    audio, sampling_rate = librosa.load(audio_path, sr=None)
    # Denoising
    cutoff_frequency = 195
    denoised_audio = apply_low_pass_filter(audio, sampling_rate, cutoff_frequency)

    # Downsampling
    target_sampling_rate = sampling_rate // 10
    downsampled_audio = downsample_audio(denoised_audio, sampling_rate, target_sampling_rate)

    # Splitting audio
    segment_length = target_sampling_rate * 3
    segments = split_audio(downsampled_audio, segment_length)
    
    model = keras.models.load_model(model_path)
    i=0
    
    for segment in segments:
        # Compute MFCC features
        mfcc = librosa.feature.mfcc(y=segment, sr=target_sampling_rate, n_mfcc=48)

        # Normalize MFCC values
        normalized_mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)
        mfcc_reshaped = np.reshape(normalized_mfcc,(1,39,32))
        
        input_shape = (39, 32, 3)
        mfcc_final = np.repeat(mfcc_reshaped[..., np.newaxis], 3, axis=-1)
        
        prediction = model.predict(mfcc_final)
        x=np.argmax(prediction)
        confidence = prediction[0, x]
        i=i+1

#         prediction = model.predict(img_arr)
#         x=np.argmax(prediction)

        classes={0:'Artifact', 1:'Extrasystole', 2:'Murmur', 3:'Normal'}
        print(classes[x],confidence)
#         print(mfcc_final.shape)

In [172]:
audio_path=r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Atraining_extrahls\201101160808.wav"
model_path=r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Models\heart_mobilenet_mfcc_8.hdf5"
pred(audio_path,model_path)

Extrasystole 0.9999813
Extrasystole 0.99992406


In [63]:
audio_path=r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Atraining_extrahls\201101160808.wav"
model_path=r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Models\heart_mobilenet_mfcc_8_downsampled.hdf5"
pred(audio_path,model_path)

Extrasystole 0.9996804
Normal 0.9463461


In [64]:
audio_path=r"C:\Users\bharg\Desktop\Heart_Sounds\murmur_1.wav"
model_path=r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Models\heart_mobilenet_mfcc_8_downsampled.hdf5"
pred(audio_path,model_path)

Artifact 0.999977
Artifact 0.99228865
Murmur 0.9982432
Murmur 0.99995935
Normal 0.865291
Murmur 0.9908213
Murmur 0.9873416
Artifact 0.961495
Murmur 0.9454602
Murmur 0.8226908
Murmur 0.73364216
Murmur 0.94140005
Normal 0.9934191
Artifact 0.7615139
Artifact 0.8350817
Murmur 0.6350628
Murmur 0.9993212
Murmur 0.65904564
Normal 0.4804638
Artifact 0.9987429
Murmur 0.5849357
Artifact 0.9568558


In [68]:
audio_path=r"C:\Users\bharg\Downloads\mixkit-inside-a-beating-heart-496.wav"
model_path=r"C:\Users\bharg\Desktop\Heart_Sounds\DatasetA\Models\heart_mobilenet_mfcc_8_downsampled.hdf5"
pred(audio_path,model_path)

Normal 0.997938
Murmur 0.99999857
Normal 0.9838365
Murmur 0.99289894
Normal 0.613056
Normal 0.9926103
