In [None]:
import numpy as np
import pandas as pd
import librosa
import os

In [None]:
data_path = "C:\\Users\\user\\Downloads\\train"
set_a=pd.read_csv("train.csv")

In [None]:
set_a.head()

Unnamed: 0,fname,label
0,healthy00001.wav,healthy
1,healthy00002.wav,healthy
2,healthy00003.wav,healthy
3,healthy00004.wav,healthy
4,healthy00005.wav,healthy


In [None]:
def features_extractor(file):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

    return mfccs_scaled_features

In [None]:
from tqdm import tqdm
### Now we iterate through every audio file and extract features
### using Mel-Frequency Cepstral Coefficients
extracted_features=[]
for index_num,row in tqdm(set_a.iterrows()):
    file_name = os.path.join(os.path.abspath(data_path), str(row["fname"]))
    final_class_labels=row["label"]
    data=features_extractor(file_name)
    extracted_features.append([data,final_class_labels])

2964it [19:15,  2.57it/s]


In [None]:
### converting extracted_features to Pandas dataframe
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','label'])
extracted_features_df.head()

Unnamed: 0,feature,label
0,"[-464.35178, 42.00815, 35.443245, 27.206812, 1...",healthy
1,"[-537.3792, 100.824684, 79.680916, 53.019573, ...",healthy
2,"[-473.14993, 115.7308, 88.082375, 54.047775, 2...",healthy
3,"[-463.59232, 112.05316, 87.31859, 56.56482, 29...",healthy
4,"[-483.0581, 115.37701, 82.38545, 42.255928, 9....",healthy


In [None]:
### Split the dataset into independent and dependent dataset
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['label'].tolist())

In [None]:
X.shape

(2964, 40)

In [None]:
y.shape

(2964,)

In [None]:
### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2, random_state=2022)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix
logi_reg=LogisticRegression()
logi_reg.fit(X_train,y_train)
logi_pred=logi_reg.predict(X_test)
acc_logi=accuracy_score(y_test, logi_pred)
print(acc_logi*100)

88.1956155143339


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
from sklearn.ensemble import RandomForestClassifier
mod_RF=RandomForestClassifier()
mod_RF.fit(X_train,y_train)
pred_RF=mod_RF.predict(X_test)
acc_RF=accuracy_score(y_test,pred_RF)
print(acc_RF*100)

94.09780775716695


In [None]:
from sklearn.ensemble import AdaBoostClassifier
mod_ada=AdaBoostClassifier()
mod_ada.fit(X_train,y_train)
pred_ada=mod_ada.predict(X_test)
acc_ada=accuracy_score(y_test, pred_ada)
print(acc_ada*100)

91.90556492411467


In [None]:
from sklearn.svm import SVC
mod_svc=SVC(kernel='linear',probability = True)
mod_svc.fit(X_train,y_train)
pred_svc=mod_svc.predict(X_test)
acc_svc=accuracy_score(y_test,pred_svc)
print(acc_svc*100)

91.73693086003372


In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
mod_tree = DecisionTreeClassifier()
mod_k = KNeighborsClassifier(n_neighbors=6)
mod_tree.fit(X_train, y_train)
mod_k.fit(X_train, y_train)
k_pred = mod_k.predict(X_test)
tree_pred = mod_tree.predict(X_test)

In [None]:
acc_tree = accuracy_score(y_test, tree_pred)
acc_tree*100

91.2310286677909

In [None]:
acc_knn = accuracy_score(y_test, k_pred)
acc_knn*100

91.56829679595279

In [None]:
ACC=pd.DataFrame({'Algorithm':['Logistic Regression', 'Random Forest','Adaboost','SVM', 'Decision Tree Classifier', 'KNN'],'Accuracy':[acc_logi*100,acc_RF*100,acc_ada*100,acc_svc*100, acc_tree*100, acc_knn*100]},columns=['Algorithm','Accuracy'])

In [None]:
ACC

Unnamed: 0,Algorithm,Accuracy
0,Logistic Regression,88.195616
1,Random Forest,94.097808
2,Adaboost,91.905565
3,SVM,91.736931
4,Decision Tree Classifier,91.231029
5,KNN,91.568297


In [None]:
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import f1_score
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
estimator_list = [
    ('knn',mod_k),
    ('svm',mod_svc),
    ('dtree',mod_tree),
    ('rf',mod_RF),
    ("ada",mod_ada)
]
stack_model = StackingClassifier(
    estimators=estimator_list, final_estimator=LogisticRegression())
# Train stacked model
stack_model.fit(X_train, y_train)

# Make predictions
y_train_pred = stack_model.predict(X_train)
y_test_pred = stack_model.predict(X_test)

# Training set model performance
stack_model_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
stack_model_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC
stack_model_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score

# Test set model performance
stack_model_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
stack_model_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
stack_model_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score

print('Model performance for Training set')
print('- Accuracy: %s' % stack_model_train_accuracy)
print('- MCC: %s' % stack_model_train_mcc)
print('- F1 score: %s' % stack_model_train_f1)
print('----------------------------------')
print('Model performance for Test set')
print('- Accuracy: %s' % stack_model_test_accuracy)
print('- MCC: %s' % stack_model_test_mcc)
print('- F1 score: %s' % stack_model_test_f1)

Model performance for Training set
- Accuracy: 0.9949388443694643
- MCC: 0.9821334639785817
- F1 score: 0.9949588183162906
----------------------------------
Model performance for Test set
- Accuracy: 0.9460370994940978
- MCC: 0.8140679998429007
- F1 score: 0.9441263749621783


In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_test_pred)

array([[476,   7],
       [ 25,  85]], dtype=int64)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

     healthy       0.95      0.99      0.97       483
   unhealthy       0.92      0.77      0.84       110

    accuracy                           0.95       593
   macro avg       0.94      0.88      0.90       593
weighted avg       0.95      0.95      0.94       593



In [None]:
#DEEP

In [None]:
### Label Encoding
#from tensorflow.keras.utils import to_categorical
#from sklearn.preprocessing import LabelEncoder
#labelencoder=LabelEncoder()
#y=to_categorical(labelencoder.fit_transform(y))
y=np.array(pd.get_dummies(y))


In [None]:
### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2, random_state=2022)

In [None]:
y.shape

(2964, 2)

In [None]:
X_train.shape

(2371, 40)

In [None]:
X_test.shape

(593, 40)

In [None]:
y_train.shape

(2371, 2)

In [None]:
y_test.shape

(593, 2)

In [None]:
X_train

array([[-5.20704041e+02,  1.23896202e+02,  9.05382538e+01, ...,
        -1.60558140e+00, -2.68588603e-01,  7.51897991e-01],
       [-6.35821472e+02,  1.28705429e+02,  9.39685440e+01, ...,
         1.54518116e+00,  2.77337527e+00,  2.63617563e+00],
       [-4.92968201e+02,  1.65156815e+02,  1.18307076e+02, ...,
         2.47329974e+00,  3.15899014e+00,  2.24857044e+00],
       ...,
       [-4.95041931e+02,  8.05405121e+01,  6.50043106e+01, ...,
        -3.58712584e-01, -3.92459363e-01,  5.79564311e-02],
       [-4.26509094e+02,  1.25598045e+02,  8.93572769e+01, ...,
         1.47553325e+00,  2.53331661e+00,  2.25540209e+00],
       [-4.24604156e+02,  1.30097977e+02,  9.23661499e+01, ...,
         8.74363363e-01,  1.86571050e+00,  1.52698898e+00]], dtype=float32)

In [None]:
y

array([[1, 0],
       [1, 0],
       [1, 0],
       ...,
       [0, 1],
       [0, 1],
       [0, 1]], dtype=uint8)

In [None]:
import tensorflow as tf
print(tf.__version__)

2.13.0


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [None]:
### No of classes
num_labels=y.shape[1]

In [None]:
model=Sequential()
###first layer
model.add(Dense(100,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###third layer
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))

###final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               4100      
                                                                 
 activation (Activation)     (None, 100)               0         
                                                                 
 dropout (Dropout)           (None, 100)               0         
                                                                 
 dense_1 (Dense)             (None, 200)               20200     
                                                                 
 activation_1 (Activation)   (None, 200)               0         
                                                                 
 dropout_1 (Dropout)         (None, 200)               0         
                                                                 
 dense_2 (Dense)             (None, 100)               2

In [None]:
model.compile(loss='binary_crossentropy',metrics=['accuracy'],optimizer='adam')

In [None]:
## Trianing my model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime

num_epochs = 200
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/heart_sound_classification.hdf5',
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/200
Epoch 1: val_loss improved from inf to 0.82946, saving model to saved_models\heart_sound_classification.hdf5
Epoch 2/200
11/75 [===>..........................] - ETA: 0s - loss: 3.5921 - accuracy: 0.7670

  saving_api.save_model(


Epoch 2: val_loss improved from 0.82946 to 0.61890, saving model to saved_models\heart_sound_classification.hdf5
Epoch 3/200
Epoch 3: val_loss did not improve from 0.61890
Epoch 4/200
Epoch 4: val_loss did not improve from 0.61890
Epoch 5/200
Epoch 5: val_loss improved from 0.61890 to 0.59271, saving model to saved_models\heart_sound_classification.hdf5
Epoch 6/200
Epoch 6: val_loss improved from 0.59271 to 0.55891, saving model to saved_models\heart_sound_classification.hdf5
Epoch 7/200
Epoch 7: val_loss improved from 0.55891 to 0.52218, saving model to saved_models\heart_sound_classification.hdf5
Epoch 8/200
Epoch 8: val_loss did not improve from 0.52218
Epoch 9/200
Epoch 9: val_loss improved from 0.52218 to 0.48349, saving model to saved_models\heart_sound_classification.hdf5
Epoch 10/200
Epoch 10: val_loss did not improve from 0.48349
Epoch 11/200
Epoch 11: val_loss improved from 0.48349 to 0.47113, saving model to saved_models\heart_sound_classification.hdf5
Epoch 12/200
Epoch 12:

Epoch 28: val_loss did not improve from 0.30938
Epoch 29/200
Epoch 29: val_loss improved from 0.30938 to 0.30518, saving model to saved_models\heart_sound_classification.hdf5
Epoch 30/200
Epoch 30: val_loss improved from 0.30518 to 0.28956, saving model to saved_models\heart_sound_classification.hdf5
Epoch 31/200
Epoch 31: val_loss did not improve from 0.28956
Epoch 32/200
Epoch 32: val_loss did not improve from 0.28956
Epoch 33/200
Epoch 33: val_loss improved from 0.28956 to 0.28774, saving model to saved_models\heart_sound_classification.hdf5
Epoch 34/200
Epoch 34: val_loss did not improve from 0.28774
Epoch 35/200
Epoch 35: val_loss improved from 0.28774 to 0.27111, saving model to saved_models\heart_sound_classification.hdf5
Epoch 36/200
Epoch 36: val_loss did not improve from 0.27111
Epoch 37/200
Epoch 37: val_loss did not improve from 0.27111
Epoch 38/200
Epoch 38: val_loss improved from 0.27111 to 0.25953, saving model to saved_models\heart_sound_classification.hdf5
Epoch 39/200

Epoch 56/200
Epoch 56: val_loss did not improve from 0.21567
Epoch 57/200
Epoch 57: val_loss did not improve from 0.21567
Epoch 58/200
Epoch 58: val_loss did not improve from 0.21567
Epoch 59/200
Epoch 59: val_loss did not improve from 0.21567
Epoch 60/200
Epoch 60: val_loss did not improve from 0.21567
Epoch 61/200
Epoch 61: val_loss did not improve from 0.21567
Epoch 62/200
Epoch 62: val_loss improved from 0.21567 to 0.21532, saving model to saved_models\heart_sound_classification.hdf5
Epoch 63/200
Epoch 63: val_loss did not improve from 0.21532
Epoch 64/200
Epoch 64: val_loss did not improve from 0.21532
Epoch 65/200
Epoch 65: val_loss did not improve from 0.21532
Epoch 66/200
Epoch 66: val_loss improved from 0.21532 to 0.20589, saving model to saved_models\heart_sound_classification.hdf5
Epoch 67/200
Epoch 67: val_loss did not improve from 0.20589
Epoch 68/200
Epoch 68: val_loss did not improve from 0.20589
Epoch 69/200
Epoch 69: val_loss did not improve from 0.20589
Epoch 70/200
E

Epoch 85: val_loss did not improve from 0.20589
Epoch 86/200
Epoch 86: val_loss did not improve from 0.20589
Epoch 87/200
Epoch 87: val_loss did not improve from 0.20589
Epoch 88/200
Epoch 88: val_loss improved from 0.20589 to 0.20340, saving model to saved_models\heart_sound_classification.hdf5
Epoch 89/200
Epoch 89: val_loss did not improve from 0.20340
Epoch 90/200
Epoch 90: val_loss did not improve from 0.20340
Epoch 91/200
Epoch 91: val_loss did not improve from 0.20340
Epoch 92/200
Epoch 92: val_loss did not improve from 0.20340
Epoch 93/200
Epoch 93: val_loss did not improve from 0.20340
Epoch 94/200
Epoch 94: val_loss did not improve from 0.20340
Epoch 95/200
Epoch 95: val_loss improved from 0.20340 to 0.19117, saving model to saved_models\heart_sound_classification.hdf5
Epoch 96/200
Epoch 96: val_loss did not improve from 0.19117
Epoch 97/200
Epoch 97: val_loss did not improve from 0.19117
Epoch 98/200
Epoch 98: val_loss did not improve from 0.19117
Epoch 99/200
Epoch 99: val_

Epoch 114/200
Epoch 114: val_loss did not improve from 0.18248
Epoch 115/200
Epoch 115: val_loss did not improve from 0.18248
Epoch 116/200
Epoch 116: val_loss did not improve from 0.18248
Epoch 117/200
Epoch 117: val_loss did not improve from 0.18248
Epoch 118/200
Epoch 118: val_loss did not improve from 0.18248
Epoch 119/200
Epoch 119: val_loss did not improve from 0.18248
Epoch 120/200
Epoch 120: val_loss did not improve from 0.18248
Epoch 121/200
Epoch 121: val_loss did not improve from 0.18248
Epoch 122/200
Epoch 122: val_loss did not improve from 0.18248
Epoch 123/200
Epoch 123: val_loss did not improve from 0.18248
Epoch 124/200
Epoch 124: val_loss did not improve from 0.18248
Epoch 125/200
Epoch 125: val_loss did not improve from 0.18248
Epoch 126/200
Epoch 126: val_loss did not improve from 0.18248
Epoch 127/200
Epoch 127: val_loss did not improve from 0.18248
Epoch 128/200
Epoch 128: val_loss did not improve from 0.18248
Epoch 129/200
Epoch 129: val_loss did not improve from 

Epoch 144/200
Epoch 144: val_loss did not improve from 0.18248
Epoch 145/200
Epoch 145: val_loss did not improve from 0.18248
Epoch 146/200
Epoch 146: val_loss did not improve from 0.18248
Epoch 147/200
Epoch 147: val_loss did not improve from 0.18248
Epoch 148/200
Epoch 148: val_loss did not improve from 0.18248
Epoch 149/200
Epoch 149: val_loss did not improve from 0.18248
Epoch 150/200
Epoch 150: val_loss improved from 0.18248 to 0.17324, saving model to saved_models\heart_sound_classification.hdf5
Epoch 151/200
Epoch 151: val_loss did not improve from 0.17324
Epoch 152/200
Epoch 152: val_loss did not improve from 0.17324
Epoch 153/200
Epoch 153: val_loss did not improve from 0.17324
Epoch 154/200
Epoch 154: val_loss did not improve from 0.17324
Epoch 155/200
Epoch 155: val_loss did not improve from 0.17324
Epoch 156/200
Epoch 156: val_loss did not improve from 0.17324
Epoch 157/200
Epoch 157: val_loss did not improve from 0.17324
Epoch 158/200
Epoch 158: val_loss did not improve fr

Epoch 174/200
Epoch 174: val_loss did not improve from 0.17324
Epoch 175/200
Epoch 175: val_loss did not improve from 0.17324
Epoch 176/200
Epoch 176: val_loss did not improve from 0.17324
Epoch 177/200
Epoch 177: val_loss did not improve from 0.17324
Epoch 178/200
Epoch 178: val_loss did not improve from 0.17324
Epoch 179/200
Epoch 179: val_loss did not improve from 0.17324
Epoch 180/200
Epoch 180: val_loss did not improve from 0.17324
Epoch 181/200
Epoch 181: val_loss did not improve from 0.17324
Epoch 182/200
Epoch 182: val_loss did not improve from 0.17324
Epoch 183/200
Epoch 183: val_loss did not improve from 0.17324
Epoch 184/200
Epoch 184: val_loss did not improve from 0.17324
Epoch 185/200
Epoch 185: val_loss improved from 0.17324 to 0.17281, saving model to saved_models\heart_sound_classification.hdf5
Epoch 186/200
Epoch 186: val_loss did not improve from 0.17281
Epoch 187/200
Epoch 187: val_loss did not improve from 0.17281
Epoch 188/200
Epoch 188: val_loss did not improve fr

In [None]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

0.9409780502319336
