In [1]:
import matplotlib.pyplot as plt
import numpy as np
import librosa
import wave
import pandas as pd
import soundfile as sf
import librosa.display
import IPython.display as ipd
from PIL import Image
import os
import scipy.signal as signal
from tqdm import tqdm
import random

In [2]:
# Step 1: Denoising using a low pass filter
def apply_low_pass_filter(audio, sampling_rate, cutoff_freq):
    nyquist_freq = 0.5 * sampling_rate
    normalized_cutoff_freq = cutoff_freq / nyquist_freq
    b, a = signal.butter(4, normalized_cutoff_freq, btype='low', analog=False)
    denoised_audio = signal.lfilter(b, a, audio)
    return denoised_audio

In [3]:
# Downsampling audio
def downsample_audio(audio,original_sampling_rate,target_sampling_rate):
    resampled_audio = librosa.resample(audio, orig_sr=original_sampling_rate, target_sr=target_sampling_rate)
    return resampled_audio

In [4]:
# Split audio into fixed-length segments
def split_audio(audio, segment_length):
    num_segments = len(audio) // segment_length
    segments = [audio[i*segment_length:(i+1)*segment_length] for i in range(num_segments)]
    return segments

In [5]:
def norm_mfcc(audio_path):

    # Load audio file
    audio, sr = librosa.load(audio_path, sr=None)

    # Compute MFCC features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=48)

    # Normalize MFCC values
    normalized_mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)
    return normalized_mfcc

In [6]:
def time_warp(mfcc, time_warping_range):
    num_frames = mfcc.shape[1]
    t = np.arange(num_frames)
    random_shift = np.random.randint(-time_warping_range, time_warping_range)
    warped_t = np.clip(t + random_shift, 0, num_frames - 1)
    warped_mfcc = mfcc[:, warped_t]
    return warped_mfcc[:, :48]

In [7]:
def frequency_mask(mfcc, frequency_masking_para, mask_max_frames):
    num_mfcc = mfcc.shape[0]
    num_frames = mfcc.shape[1]
    f = np.random.randint(0, frequency_masking_para)
    f0 = random.randint(0, num_mfcc - f)
    mfcc[f0:f0 + f, :] = 0.0
    return mfcc[:, :48]

In [8]:
Df_A=pd.read_csv("../dataframes/Heart_Audio_Default_Folders_A.csv")

In [9]:
Df_A

Unnamed: 0,File,Class,Folder
0,201012172012.wav,Artifact,Atraining_artifact
1,201105040918.wav,Artifact,Atraining_artifact
2,201105041959.wav,Artifact,Atraining_artifact
3,201105051017.wav,Artifact,Atraining_artifact
4,201105060108.wav,Artifact,Atraining_artifact
...,...,...,...
119,201103200218.wav,Extrasystole,Atraining_extrahls
120,201104021355.wav,Extrasystole,Atraining_extrahls
121,201104140118.wav,Extrasystole,Atraining_extrahls
122,201104270458.wav,Extrasystole,Atraining_extrahls


In [10]:
input_shape = (39,32)  # Example input shape for MobileNetV2

In [12]:
MFCC_Dataset=[]
folder_path ="../Datasets/A_data/time_shifted_audio"

# Get the file names from the folder
file_names = os.listdir(folder_path)

# Print the file names
for file_name in file_names:
    mfcc_class=Df_A.loc[Df_A["File"]==(str(file_name[:12])+".wav"),"Class"].iloc[0]
    audio_path = os.path.join(os.path.abspath(folder_path),file_name)
    mfcc_array=norm_mfcc(audio_path)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(mfcc_array,input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])

In [13]:
len(MFCC_Dataset)

291

In [14]:
folder_path = "../Datasets/A_data/pitch_shifted_audio"

# Get the file names from the folder
file_names = os.listdir(folder_path)

# Print the file names
for file_name in file_names:
    mfcc_class=Df_A.loc[Df_A["File"]==(str(file_name[:12])+".wav"),"Class"].iloc[0]
    audio_path = os.path.join(os.path.abspath(folder_path),file_name)
    mfcc_array=norm_mfcc(audio_path)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(mfcc_array,input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])

In [15]:
len(MFCC_Dataset)

582

In [16]:
folder_path = "../Datasets/A_data/normalised_audio"

# Get the file names from the folder
file_names = os.listdir(folder_path)

# Print the file names
for file_name in file_names:
    mfcc_class=Df_A.loc[Df_A["File"]==(str(file_name[:12])+".wav"),"Class"].iloc[0]
    audio_path = os.path.join(os.path.abspath(folder_path),file_name)
    mfcc_array=norm_mfcc(audio_path)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(mfcc_array, input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])
    
    # Set time warping range (adjust according to your needs)
    time_warping_range = 5

    warped_mfcc = time_warp(mfcc_array, time_warping_range)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(warped_mfcc,input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])
    
    # Set frequency masking parameters (adjust according to your needs)
    frequency_masking_para = 5
    mask_max_frames = 5

    masked_mfcc = frequency_mask(mfcc_array, frequency_masking_para, mask_max_frames)
    masked_mfcc2 = frequency_mask(mfcc_array, frequency_masking_para, mask_max_frames)
    # Reshape MFCC array
    mfcc_reshaped = np.reshape(masked_mfcc, input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])
    mfcc_reshaped = np.reshape(masked_mfcc2, input_shape)
    MFCC_Dataset.append([mfcc_reshaped,mfcc_class])

In [17]:
len(MFCC_Dataset)

1746

In [18]:
MFCC_Df=pd.DataFrame(MFCC_Dataset, columns=['MFCC', 'Class'])

In [19]:
MFCC_Df

Unnamed: 0,MFCC,Class
0,"[[-5.269797, -5.763967, -6.6692266, -6.3825703...",Extrasystole
1,"[[-5.6569643, -6.1762, -6.7199655, -6.6153536,...",Extrasystole
2,"[[-3.1142056, -3.6126595, -6.192634, -6.212791...",Artifact
3,"[[-3.2704666, -3.6076217, -5.9305277, -6.04547...",Artifact
4,"[[-6.0625944, -6.4801135, -7.0279455, -6.63700...",Normal
...,...,...
1741,"[[-6.264359, -6.0892177, -6.101441, -6.2145867...",Murmur
1742,"[[-7.60592, -7.627124, -7.6543207, -7.6105256,...",Artifact
1743,"[[-7.60592, -7.60592, -7.60592, -7.627124, -7....",Artifact
1744,"[[-7.60592, -7.627124, -7.6543207, -7.6105256,...",Artifact


In [20]:
MFCC_Df['Class'].value_counts()

Class
Artifact        720
Normal          402
Murmur          402
Extrasystole    222
Name: count, dtype: int64

In [21]:
for i in MFCC_Df["MFCC"]:
    print(i.shape)

(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(39, 32)
(

In [22]:
# Define the column value to filter
column_value = 'Artifact'

# Filter the DataFrame based on the column value
filtered_df = MFCC_Df[MFCC_Df['Class'] == column_value]

# Calculate the number of rows to drop (half of the filtered rows)
rows_to_drop = len(filtered_df) // 2

# Randomly select rows to drop
rows = np.random.choice(filtered_df.index, rows_to_drop, replace=False)

# Drop the selected rows
MFCC_Final = MFCC_Df.drop(rows)

# Print the updated DataFrame
print(MFCC_Final)

                                                   MFCC         Class
0     [[-5.269797, -5.763967, -6.6692266, -6.3825703...  Extrasystole
1     [[-5.6569643, -6.1762, -6.7199655, -6.6153536,...  Extrasystole
2     [[-3.1142056, -3.6126595, -6.192634, -6.212791...      Artifact
3     [[-3.2704666, -3.6076217, -5.9305277, -6.04547...      Artifact
4     [[-6.0625944, -6.4801135, -7.0279455, -6.63700...        Normal
...                                                 ...           ...
1739  [[-6.412322, -6.758966, -6.60008, -6.3091345, ...        Murmur
1740  [[-6.264359, -6.0892177, -6.101441, -6.2145867...        Murmur
1741  [[-6.264359, -6.0892177, -6.101441, -6.2145867...        Murmur
1743  [[-7.60592, -7.60592, -7.60592, -7.627124, -7....      Artifact
1744  [[-7.60592, -7.627124, -7.6543207, -7.6105256,...      Artifact

[1386 rows x 2 columns]


In [23]:
X=np.array(MFCC_Final["MFCC"].tolist())
y=np.array(MFCC_Final["Class"].tolist())

In [24]:
X.shape

(1386, 39, 32)

In [25]:
input_shape = (39, 32, 3)
X = np.repeat(X[..., np.newaxis], 3, axis=-1)

In [26]:
X.shape

(1386, 39, 32, 3)

In [27]:
np.unique(y)

array(['Artifact', 'Extrasystole', 'Murmur', 'Normal'], dtype='<U12')

In [28]:
y=np.array(pd.get_dummies(y))

In [29]:
y

array([[False,  True, False, False],
       [False,  True, False, False],
       [ True, False, False, False],
       ...,
       [False, False,  True, False],
       [ True, False, False, False],
       [ True, False, False, False]])

In [30]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

In [31]:
X_train.shape

(1108, 39, 32, 3)

In [32]:
X_train,X_valid,y_train,y_valid=train_test_split(X_train,y_train,test_size=0.15,random_state=42)

In [33]:
X_train.shape

(941, 39, 32, 3)

In [34]:
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Conv2D,MaxPooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization
from tensorflow import keras

In [69]:
base_model=keras.applications.MobileNetV2(
    input_shape=(39,32,3),
    include_top=False,
    weights='imagenet'
)

base_model.trainable = False

model = keras.Sequential([
    base_model,
    keras.layers.Flatten(),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dropout(0.35),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(4, activation='softmax')
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Func  (None, 2, 1, 1280)        2257984   
 tional)                                                         
                                                                 
 flatten (Flatten)           (None, 2560)              0         
                                                                 
 dense (Dense)               (None, 256)               655616    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                        

In [70]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [71]:
checkpoint = ModelCheckpoint('heart_mobilenet_mfcc.hdf5', save_best_only=True, save_weights_only=False, monitor='val_loss', mode='max')
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_valid, y_valid), callbacks=[checkpoint],verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [72]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

0.8812949657440186


In [73]:
def pred(audio_path, model_path):
    audio, sampling_rate = librosa.load(audio_path, sr=None)
    # Denoising
    cutoff_frequency = 195
    denoised_audio = apply_low_pass_filter(audio, sampling_rate, cutoff_frequency)

    # Downsampling
    target_sampling_rate = sampling_rate // 10
    downsampled_audio = downsample_audio(denoised_audio, sampling_rate, target_sampling_rate)

    # Splitting audio
    segment_length = target_sampling_rate * 3
    segments = split_audio(downsampled_audio, segment_length)
    
    model = keras.models.load_model(model_path)
    i=0
    
    for segment in segments:
        # Compute MFCC features
        mfcc = librosa.feature.mfcc(y=segment, sr=target_sampling_rate, n_mfcc=48)

        # Normalize MFCC values
        normalized_mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)
        mfcc_reshaped = np.reshape(normalized_mfcc,(1,39,32))
        
        input_shape = (39, 32, 3)
        mfcc_final = np.repeat(mfcc_reshaped[..., np.newaxis], 3, axis=-1)
        
        prediction = model.predict(mfcc_final)
        x=np.argmax(prediction)
        confidence = prediction[0, x]
        i=i+1

#         prediction = model.predict(img_arr)
#         x=np.argmax(prediction)

        classes={0:'Artifact', 1:'Extrasystole', 2:'Murmur', 3:'Normal'}
        print(classes[x],confidence)
#         print(mfcc_final.shape)

In [67]:
import tensorflow.keras.backend as K

In [68]:
K.clear_session()