In [1]:
import os
import pandas as pd
import numpy as np
import librosa
from os import listdir
from os.path import isfile, join, normpath
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score, confusion_matrix, classification_report
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [2]:
RANDOM_STATE = 26
TEST_SIZE = 0.3
N_MFCC = 60
LS_CSV_PATH = "../data/HLS-CMDS/LS.csv"
LS_AUDIO_PATH = "../data/HLS-CMDS/LS/LS"

In [3]:
df = pd.read_csv(LS_CSV_PATH)
print(df.head())

  Gender Lung Sound Type Location Lung Sound ID
0      M          Normal      RUA       M_N_RUA
1      F          Normal      LUA       F_N_LUA
2      F          Normal      RMA       F_N_RMA
3      F          Normal      LMA       F_N_LMA
4      M          Normal      RLA       M_N_RLA


In [4]:
df.columns

Index(['Gender', 'Lung Sound Type', 'Location', 'Lung Sound ID'], dtype='object')

In [5]:
df['audio_path'] = df['Lung Sound ID'].apply(lambda x: normpath(join(LS_AUDIO_PATH, f"{str(x).strip()}.wav").replace('\\', '/')))

In [6]:
df['audio_path'] = df['audio_path'].str.replace('_C_', '_FC_').str.replace('_G_', '_CC_')

In [7]:
def extract_mfcc(filename, n_mfcc=N_MFCC):
    y, sr = librosa.load(filename, sr=44100)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_std = np.std(mfcc, axis=1)
    return np.concatenate([mfcc_mean, mfcc_std])

def flatten_mfcc(mfcc):
    return np.array(mfcc).flatten()

In [8]:
mfcc_list = []
labels = []

for _, row in df.iterrows():
    try:
        mfcc = extract_mfcc(row['audio_path'])
        # mfcc_flat = flatten_mfcc(mfcc)
        mfcc_list.append(mfcc)
        labels.append(row['Lung Sound Type'])
    except Exception as e:
        print(f"[WARN] Failed: {row['audio_path']} -> {e}")
    
X = pd.DataFrame(mfcc_list)
y = pd.Series(labels)

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y)

In [12]:
print(Xtrain)

           0           1           2          3          4          5    \
13 -764.382507  109.993454   86.915489  58.161190  32.065441  13.917945   
42 -804.508118  135.689194   97.989151  58.641506  31.542353  17.019617   
47 -737.755554   97.976143   79.802406  55.386112  30.890879  11.520988   
25 -899.696899  187.645477  100.031960  25.027435  -3.099097   1.393674   
19 -895.848694  174.154938   97.037033  31.387096   7.115599  10.852146   
36 -769.144226  138.034225  101.726128  59.372559  24.407873   2.191145   
7  -746.756958   99.175041   81.944016  59.211891  36.745846  19.026667   
43 -727.604492  118.306320   91.268234  56.343464  23.310570  -0.230543   
15 -835.634277  151.359619   92.503860  36.197353   5.591797  -3.016563   
48 -830.565613  157.586548   98.892769  41.858944   9.410712  -0.891013   
30 -719.526306  113.287270   90.218506  59.967068  30.633524   8.895968   
46 -697.834473  134.732422  101.699532  58.715763  18.222704 -10.119292   
34 -756.913635  110.20207

In [13]:
print(Xtest)

           0           1           2          3          4          5    \
10 -791.853333  148.223541  102.579453  53.560711  18.770983   0.448915   
18 -907.649963  182.376648   98.156548  27.341349   2.342365   7.316676   
29 -882.322083  171.389389   97.712906  33.567753   7.279616   7.227210   
3  -724.659546  144.850159  107.068138  59.700844  17.388952 -10.037862   
44 -774.176941  127.300331   94.564056  56.416130  25.146349   5.862497   
38 -810.261536  124.774887   88.446899  50.298771  23.795691   9.764002   
27 -767.353638  114.779922   86.489746  52.320290  22.978497   4.408137   
5  -633.667542   99.962654   78.737015  50.426052  22.790291   2.304846   
33 -843.659363  143.714569   90.734245  40.483559  13.734840   6.574737   
1  -772.347656  154.753281  105.169724  50.193199   9.124076 -13.198630   
49 -823.011597  149.773865   97.758720  46.184132  14.970344   2.451811   
39 -781.929016  155.836044  107.175743  54.182816  15.739351  -4.715007   
17 -821.996704  153.76763

In [9]:
pipe_model = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='rbf', random_state=RANDOM_STATE, class_weight='balanced'))
])

param_grid = {
    'svc__C': [0.1, 1, 10, 100],
    'svc__gamma': [0.001, 0.01, 0.1, 1],
    'svc__kernel': ['rbf', 'linear', 'poly']
}

model_grid = GridSearchCV(
    estimator=pipe_model, 
    param_grid=param_grid, 
    scoring='f1_macro', 
    cv=5, 
    n_jobs=-1, 
    verbose=2
)

In [10]:
model_grid.fit(Xtrain, ytrain)
model = model_grid.best_estimator_
print(f"Best parameters: {model_grid.best_params_}")

ypred = model.predict(Xtest)
f1 = f1_score(ytest, ypred, average='macro')
print(f"LS: Lung Sound Type -> macro-F1: {f1}")

Fitting 5 folds for each of 48 candidates, totalling 240 fits




Best parameters: {'svc__C': 0.1, 'svc__gamma': 0.001, 'svc__kernel': 'linear'}
LS: Lung Sound Type -> macro-F1: 1.0


In [11]:
cm = confusion_matrix(ytest, ypred)
print("Confusion Matrix:")
print(cm)

print("\nClassification Report:")
print(classification_report(ytest, ypred))

Confusion Matrix:
[[3 0 0 0 0 0]
 [0 1 0 0 0 0]
 [0 0 4 0 0 0]
 [0 0 0 3 0 0]
 [0 0 0 0 2 0]
 [0 0 0 0 0 2]]

Classification Report:
                 precision    recall  f1-score   support

Coarse Crackles       1.00      1.00      1.00         3
  Fine Crackles       1.00      1.00      1.00         1
         Normal       1.00      1.00      1.00         4
    Pleural Rub       1.00      1.00      1.00         3
        Rhonchi       1.00      1.00      1.00         2
       Wheezing       1.00      1.00      1.00         2

       accuracy                           1.00        15
      macro avg       1.00      1.00      1.00        15
   weighted avg       1.00      1.00      1.00        15

