In [2]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import librosa
import wave
import pandas as pd
import soundfile as sf
import librosa.display
from PIL import Image
import os
import tensorflow as tf
import scipy.signal as signal
import cv2

In [3]:
import tensorflow as tf
print(tf.__version__)

2.12.0


In [3]:
def apply_low_pass_filter(audio, sampling_rate, cutoff_freq):
    nyquist_freq = 0.5 * sampling_rate
    normalized_cutoff_freq = cutoff_freq / nyquist_freq
    b, a = signal.butter(4, normalized_cutoff_freq, btype='low', analog=False)
    denoised_audio = signal.lfilter(b, a, audio)
    return denoised_audio

In [4]:
def downsample_audio(audio,original_sampling_rate,target_sampling_rate):
    resampled_audio = librosa.resample(audio, orig_sr=original_sampling_rate, target_sr=target_sampling_rate)
    return resampled_audio

In [5]:
def split_audio(audio, segment_length):
    num_segments = len(audio) // segment_length
    segments = [audio[i*segment_length:(i+1)*segment_length] for i in range(num_segments)]
    return segments

In [6]:
time_shift_factor = 1
pitch_shift_steps = -0.5

def apply_pitch_shift(audio, target_sampling_rate, pitch_shift):
    audio_float = audio.astype(np.float32)  # Remove the normalization step
    shifted_audio = librosa.effects.pitch_shift(audio_float, sr=target_sampling_rate, n_steps=pitch_shift)
    return shifted_audio

def apply_time_shift(audio, time_shift):
    shifted_audio = np.roll(audio, int(time_shift))  # Convert the time shift to an integer
    return shifted_audio

In [19]:
from PIL import Image
spec_data=[]

In [20]:
def data(root_directory,final_class_labels):
    for folder_name, subfolders, filenames in os.walk(root_directory):
        # Iterate through audio files in the current folder
        for filename in filenames:
            if filename.lower().endswith(('.png')):
#                 Construct the full path to the audio file
                audio_path = os.path.join(folder_name, filename)
                img=Image.open(audio_path).convert('RGB')
                img=img.resize((128,128))
                img_array = np.asarray(img)
                img_array = img_array / 255
                spec_data.append([img_array,final_class_labels])   
#                 spec_data.append([img_array.tolist(),final_class_labels])   

In [21]:
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Spec-A/Atraining_artifact','artifact')
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/SpecAugment-A/Atraining_artifact','artifact')

In [22]:
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Spec-A/Atraining_extrahls','extrahls')
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/SpecAugment-A/Atraining_extrahls','extrahls')

In [23]:
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Spec-A/Atraining_murmur','murmur')
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/SpecAugment-A/Atraining_murmur','murmur')

In [24]:
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/Spec-A/Atraining_normal','normal')
data('/Users/rishikabethi/Desktop/Heart/Dataset_A/SpecAugment-A/Atraining_normal','normal')

In [26]:
import pandas as pd
spec_data=pd.DataFrame(spec_data,columns=['feature','class'])
spec_data.head()

Unnamed: 0,feature,class
0,"[[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...",artifact
1,"[[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...",artifact
2,"[[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...",artifact
3,"[[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...",artifact
4,"[[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...",artifact


In [28]:
spec_data['class'].value_counts()

class
artifact    720
normal      408
murmur      396
extrahls    222
Name: count, dtype: int64

In [29]:
spec_data['feature'].shape

(1746,)

In [32]:
X=np.array(spec_data['feature'].tolist())
y=np.array(spec_data['class'].tolist())

In [33]:
X.shape

(1746, 128, 128, 3)

In [138]:
y.shape

(1746,)

In [39]:
#downsampling artifact in dataset-A (drop 50%)

import numpy as np

# Identify the class label you want to downsample
class_label = 'artifact'  # Update with your desired class label

# Separate the data into eliminated and remaining subsets
eliminated_subset_X = X[y == class_label]
eliminated_subset_y = y[y == class_label]
remaining_subset_X = X[y != class_label]
remaining_subset_y = y[y != class_label]

# Randomly select 50% of the data from the remaining subset
num_eliminated_samples = len(eliminated_subset_X)
num_samples_to_select = int(num_eliminated_samples * 0.5)

random_indices = np.random.choice(num_eliminated_samples, size=num_samples_to_select, replace=False)
randomly_selected_X = eliminated_subset_X[random_indices]
randomly_selected_y = eliminated_subset_y[random_indices]

# Combine the eliminated subset and the randomly selected subset
downsampled_X = np.concatenate((remaining_subset_X, randomly_selected_X), axis=0)
downsampled_y = np.concatenate((remaining_subset_y, randomly_selected_y), axis=0)


In [40]:
X=downsampled_X
y=np.array(pd.get_dummies(downsampled_y))

In [41]:
X=np.concatenate((X, X), axis=0)
y=np.concatenate((y, y), axis=0)

In [42]:
X.shape

(2772, 128, 128, 3)

In [43]:
y.shape

(2772, 4)

In [44]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [45]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.15, random_state=42)

In [46]:
X_train.shape

(1884, 128, 128, 3)

In [47]:
from tensorflow.keras.applications import ResNet152V2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Dropout

# Load the pre-trained ResNet152V2 model
resnet152v2 = ResNet152V2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

# Freeze the pre-trained layers
for layer in resnet152v2.layers:
    layer.trainable = False

# Add custom layers on top of the pre-trained model
flatten_layer = Flatten()(resnet152v2.output)
dense_layer = Dense(128, activation='relu')(flatten_layer)
dropout_layer = Dropout(0.35)(dense_layer)
classification_layer = Dense(4, activation='softmax')(dropout_layer)  # Replace 'num_classes' with the number of classes in your problem

# Create the final model
model = Model(inputs=resnet152v2.input, outputs=classification_layer)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()


Metal device set to: Apple M1
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 134, 134, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 64, 64, 64)   9472        ['conv1_pad[0][0]']              
                                                                                                  
 pool1_pad (ZeroPadding2D)      (None, 66, 66, 64)   0          

 vation)                                                                                          
                                                                                                  
 conv2_block3_1_conv (Conv2D)   (None, 32, 32, 64)   16384       ['conv2_block3_preact_relu[0][0]'
                                                                 ]                                
                                                                                                  
 conv2_block3_1_bn (BatchNormal  (None, 32, 32, 64)  256         ['conv2_block3_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv2_block3_1_relu (Activatio  (None, 32, 32, 64)  0           ['conv2_block3_1_bn[0][0]']      
 n)                                                                                               
          

 conv3_block2_2_bn (BatchNormal  (None, 16, 16, 128)  512        ['conv3_block2_2_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv3_block2_2_relu (Activatio  (None, 16, 16, 128)  0          ['conv3_block2_2_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv3_block2_3_conv (Conv2D)   (None, 16, 16, 512)  66048       ['conv3_block2_2_relu[0][0]']    
                                                                                                  
 conv3_block2_out (Add)         (None, 16, 16, 512)  0           ['conv3_block1_out[0][0]',       
                                                                  'conv3_block2_3_conv[0][0]']    
          

 ization)                                                                                         
                                                                                                  
 conv3_block5_1_relu (Activatio  (None, 16, 16, 128)  0          ['conv3_block5_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv3_block5_2_pad (ZeroPaddin  (None, 18, 18, 128)  0          ['conv3_block5_1_relu[0][0]']    
 g2D)                                                                                             
                                                                                                  
 conv3_block5_2_conv (Conv2D)   (None, 16, 16, 128)  147456      ['conv3_block5_2_pad[0][0]']     
                                                                                                  
 conv3_blo

 conv3_block8_preact_bn (BatchN  (None, 16, 16, 512)  2048       ['conv3_block7_out[0][0]']       
 ormalization)                                                                                    
                                                                                                  
 conv3_block8_preact_relu (Acti  (None, 16, 16, 512)  0          ['conv3_block8_preact_bn[0][0]'] 
 vation)                                                                                          
                                                                                                  
 conv3_block8_1_conv (Conv2D)   (None, 16, 16, 128)  65536       ['conv3_block8_preact_relu[0][0]'
                                                                 ]                                
                                                                                                  
 conv3_block8_1_bn (BatchNormal  (None, 16, 16, 128)  512        ['conv3_block8_1_conv[0][0]']    
 ization) 

 g2D)                                                                                             
                                                                                                  
 conv4_block2_2_conv (Conv2D)   (None, 8, 8, 256)    589824      ['conv4_block2_2_pad[0][0]']     
                                                                                                  
 conv4_block2_2_bn (BatchNormal  (None, 8, 8, 256)   1024        ['conv4_block2_2_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv4_block2_2_relu (Activatio  (None, 8, 8, 256)   0           ['conv4_block2_2_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv4_blo

 conv4_block5_1_conv (Conv2D)   (None, 8, 8, 256)    262144      ['conv4_block5_preact_relu[0][0]'
                                                                 ]                                
                                                                                                  
 conv4_block5_1_bn (BatchNormal  (None, 8, 8, 256)   1024        ['conv4_block5_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv4_block5_1_relu (Activatio  (None, 8, 8, 256)   0           ['conv4_block5_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv4_block5_2_pad (ZeroPaddin  (None, 10, 10, 256)  0          ['conv4_block5_1_relu[0][0]']    
 g2D)     

                                                                                                  
 conv4_block7_out (Add)         (None, 8, 8, 1024)   0           ['conv4_block6_out[0][0]',       
                                                                  'conv4_block7_3_conv[0][0]']    
                                                                                                  
 conv4_block8_preact_bn (BatchN  (None, 8, 8, 1024)  4096        ['conv4_block7_out[0][0]']       
 ormalization)                                                                                    
                                                                                                  
 conv4_block8_preact_relu (Acti  (None, 8, 8, 1024)  0           ['conv4_block8_preact_bn[0][0]'] 
 vation)                                                                                          
                                                                                                  
 conv4_blo

                                                                                                  
 conv4_block10_2_conv (Conv2D)  (None, 8, 8, 256)    589824      ['conv4_block10_2_pad[0][0]']    
                                                                                                  
 conv4_block10_2_bn (BatchNorma  (None, 8, 8, 256)   1024        ['conv4_block10_2_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 conv4_block10_2_relu (Activati  (None, 8, 8, 256)   0           ['conv4_block10_2_bn[0][0]']     
 on)                                                                                              
                                                                                                  
 conv4_block10_3_conv (Conv2D)  (None, 8, 8, 1024)   263168      ['conv4_block10_2_relu[0][0]']   
          

                                                                 ']                               
                                                                                                  
 conv4_block13_1_bn (BatchNorma  (None, 8, 8, 256)   1024        ['conv4_block13_1_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 conv4_block13_1_relu (Activati  (None, 8, 8, 256)   0           ['conv4_block13_1_bn[0][0]']     
 on)                                                                                              
                                                                                                  
 conv4_block13_2_pad (ZeroPaddi  (None, 10, 10, 256)  0          ['conv4_block13_1_relu[0][0]']   
 ng2D)                                                                                            
          

 conv4_block15_out (Add)        (None, 8, 8, 1024)   0           ['conv4_block14_out[0][0]',      
                                                                  'conv4_block15_3_conv[0][0]']   
                                                                                                  
 conv4_block16_preact_bn (Batch  (None, 8, 8, 1024)  4096        ['conv4_block15_out[0][0]']      
 Normalization)                                                                                   
                                                                                                  
 conv4_block16_preact_relu (Act  (None, 8, 8, 1024)  0           ['conv4_block16_preact_bn[0][0]']
 ivation)                                                                                         
                                                                                                  
 conv4_block16_1_conv (Conv2D)  (None, 8, 8, 256)    262144      ['conv4_block16_preact_relu[0][0]
          

 conv4_block18_2_conv (Conv2D)  (None, 8, 8, 256)    589824      ['conv4_block18_2_pad[0][0]']    
                                                                                                  
 conv4_block18_2_bn (BatchNorma  (None, 8, 8, 256)   1024        ['conv4_block18_2_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 conv4_block18_2_relu (Activati  (None, 8, 8, 256)   0           ['conv4_block18_2_bn[0][0]']     
 on)                                                                                              
                                                                                                  
 conv4_block18_3_conv (Conv2D)  (None, 8, 8, 1024)   263168      ['conv4_block18_2_relu[0][0]']   
                                                                                                  
 conv4_blo

                                                                                                  
 conv4_block21_1_bn (BatchNorma  (None, 8, 8, 256)   1024        ['conv4_block21_1_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 conv4_block21_1_relu (Activati  (None, 8, 8, 256)   0           ['conv4_block21_1_bn[0][0]']     
 on)                                                                                              
                                                                                                  
 conv4_block21_2_pad (ZeroPaddi  (None, 10, 10, 256)  0          ['conv4_block21_1_relu[0][0]']   
 ng2D)                                                                                            
                                                                                                  
 conv4_blo

                                                                  'conv4_block23_3_conv[0][0]']   
                                                                                                  
 conv4_block24_preact_bn (Batch  (None, 8, 8, 1024)  4096        ['conv4_block23_out[0][0]']      
 Normalization)                                                                                   
                                                                                                  
 conv4_block24_preact_relu (Act  (None, 8, 8, 1024)  0           ['conv4_block24_preact_bn[0][0]']
 ivation)                                                                                         
                                                                                                  
 conv4_block24_1_conv (Conv2D)  (None, 8, 8, 256)    262144      ['conv4_block24_preact_relu[0][0]
                                                                 ']                               
          

                                                                                                  
 conv4_block26_2_bn (BatchNorma  (None, 8, 8, 256)   1024        ['conv4_block26_2_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 conv4_block26_2_relu (Activati  (None, 8, 8, 256)   0           ['conv4_block26_2_bn[0][0]']     
 on)                                                                                              
                                                                                                  
 conv4_block26_3_conv (Conv2D)  (None, 8, 8, 1024)   263168      ['conv4_block26_2_relu[0][0]']   
                                                                                                  
 conv4_block26_out (Add)        (None, 8, 8, 1024)   0           ['conv4_block25_out[0][0]',      
          

 conv4_block29_1_bn (BatchNorma  (None, 8, 8, 256)   1024        ['conv4_block29_1_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 conv4_block29_1_relu (Activati  (None, 8, 8, 256)   0           ['conv4_block29_1_bn[0][0]']     
 on)                                                                                              
                                                                                                  
 conv4_block29_2_pad (ZeroPaddi  (None, 10, 10, 256)  0          ['conv4_block29_1_relu[0][0]']   
 ng2D)                                                                                            
                                                                                                  
 conv4_block29_2_conv (Conv2D)  (None, 8, 8, 256)    589824      ['conv4_block29_2_pad[0][0]']    
          

                                                                                                  
 conv4_block32_preact_bn (Batch  (None, 8, 8, 1024)  4096        ['conv4_block31_out[0][0]']      
 Normalization)                                                                                   
                                                                                                  
 conv4_block32_preact_relu (Act  (None, 8, 8, 1024)  0           ['conv4_block32_preact_bn[0][0]']
 ivation)                                                                                         
                                                                                                  
 conv4_block32_1_conv (Conv2D)  (None, 8, 8, 256)    262144      ['conv4_block32_preact_relu[0][0]
                                                                 ']                               
                                                                                                  
 conv4_blo

 conv4_block34_2_bn (BatchNorma  (None, 8, 8, 256)   1024        ['conv4_block34_2_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 conv4_block34_2_relu (Activati  (None, 8, 8, 256)   0           ['conv4_block34_2_bn[0][0]']     
 on)                                                                                              
                                                                                                  
 conv4_block34_3_conv (Conv2D)  (None, 8, 8, 1024)   263168      ['conv4_block34_2_relu[0][0]']   
                                                                                                  
 conv4_block34_out (Add)        (None, 8, 8, 1024)   0           ['conv4_block33_out[0][0]',      
                                                                  'conv4_block34_3_conv[0][0]']   
          

                                                                                                  
 conv5_block1_1_bn (BatchNormal  (None, 4, 4, 512)   2048        ['conv5_block1_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv5_block1_1_relu (Activatio  (None, 4, 4, 512)   0           ['conv5_block1_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv5_block1_2_pad (ZeroPaddin  (None, 6, 6, 512)   0           ['conv5_block1_1_relu[0][0]']    
 g2D)                                                                                             
                                                                                                  
 conv5_blo

 conv5_block3_3_conv (Conv2D)   (None, 4, 4, 2048)   1050624     ['conv5_block3_2_relu[0][0]']    
                                                                                                  
 conv5_block3_out (Add)         (None, 4, 4, 2048)   0           ['conv5_block2_out[0][0]',       
                                                                  'conv5_block3_3_conv[0][0]']    
                                                                                                  
 post_bn (BatchNormalization)   (None, 4, 4, 2048)   8192        ['conv5_block3_out[0][0]']       
                                                                                                  
 post_relu (Activation)         (None, 4, 4, 2048)   0           ['post_bn[0][0]']                
                                                                                                  
 flatten (Flatten)              (None, 32768)        0           ['post_relu[0][0]']              
          

In [48]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint('heart_resnet_valid.hdf5',save_best_only=True,save_weights_only=False,monitor='val_accuracy',model='max')

In [50]:
start_time= time.time()

In [51]:
#dense-128 and x*2(x_valid)
num_epochs = 20
num_batch_size = 32
model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_valid, y_valid), callbacks=[checkpointer], verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x5a66e9e10>

In [52]:
total_time=time.time() - start_time
total_time

748.9017159938812

In [63]:
y_pred=model.predict(X_test)



In [62]:
score = model.evaluate(X_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Testing Accuracy:  0.9855855703353882


In [70]:
c_acc = tf.keras.metrics.CategoricalAccuracy()
c_acc.update_state(y_test,y_pred)
c_acc.result().numpy()

0.9855856

In [71]:
auc = tf.keras.metrics.AUC()
auc.update_state(y_test,y_pred)
auc.result().numpy()

0.99971646

In [72]:
recall = tf.keras.metrics.Recall()
recall.update_state(y_test,y_pred)
recall.result().numpy()

0.9855856

In [73]:
precision = tf.keras.metrics.Precision()
precision.update_state(y_test,y_pred)
precision.result().numpy()

0.9855856

In [55]:
d = {0: 'Artifact', 1: 'Extrasystole', 2: 'Murmur', 3: 'Normal'}

def pred(audio_path):
    heart_class=[]
    heart_confidence=[]

    audio, sampling_rate = librosa.load(audio_path, sr=None)

    # Denoising
    cutoff_frequency = 195
    denoised_audio = apply_low_pass_filter(audio, sampling_rate, cutoff_frequency)

    # Downsampling
    target_sampling_rate = sampling_rate // 10
    downsampled_audio = downsample_audio(denoised_audio, sampling_rate, target_sampling_rate)

    # Splitting audio
    segment_length = target_sampling_rate * 3
    segments = split_audio(downsampled_audio, segment_length)
    
    for segment in segments:

        spectrogram = librosa.feature.melspectrogram(y=segment, sr=target_sampling_rate)

        # Convert to decibels
        spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

        # Plot spectrogram
        plt.figure(figsize=(1.28,1.28))
        librosa.display.specshow(spectrogram_db, sr=target_sampling_rate)
        plt.savefig('spectrogram.png',transparent=True)
        plt.close()

        img=Image.open('spectrogram.png').convert('RGB')
        img_arr=np.asarray(img)
        img_arr=img_arr/255

        img_arr = img_arr.reshape(1, 128, 128, 3)
        
        prediction = model.predict(img_arr)
        x=np.argmax(prediction)
        confidence = prediction[0, x]
        heart_class.append(d[x])
        heart_confidence.append(confidence)
        os.remove('spectrogram.png')
    print(heart_class[heart_confidence.index(max(heart_confidence))])

In [56]:
pred('/Users/rishikabethi/Downloads/Artifact.wav')

Artifact


In [111]:
audio_path='/Users/rishikabethi/Desktop/Heart/Dataset_A/Atraining_murmur/201101051108.wav'
pred(audio_path)

Murmur


In [113]:
audio_path='/Users/rishikabethi/Desktop/Heart/Dataset_A/Atraining_normal/201103221214.wav'
pred(audio_path)

Normal


In [100]:
audio_path='/Users/rishikabethi/Desktop/Heart/Dataset_A/Atraining_extrahls/201104270458.wav'
pred(audio_path)

Extrasystole
