
 Pneumonia Detection from Breath & Cough Sounds

 
 Using MFCCs + LSTM


#  Import Libraries

In [1]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking
from tensorflow.keras.callbacks import EarlyStopping

#  Load Dataset

In [2]:
df = pd.read_csv("final_dataset.csv")  

In [3]:
df.head()

Unnamed: 0,sex_choice,age_choice,current_city,symptoms_status_choice,medical_condition_choice,insomnia_status_choice,smoke_status_choice,cov19_status_choice,hospital_choice,cough_noise,device_model,file_cough,label,cough_duration,nose_duration,mouth_duration,mouth_path,nose_path
0,Male,26,Ho Chi Minh,"['sorethroat', 'wetcough', 'stuffynose', 'sniv...",['No'],Onceper2Weeks,never,last14,No,True,Laptop/Desktop,cough/bad_cough_2021-09-16T23:17:05.234Z,1,16.64,18.944,19.626667,breathe_mouth\bad_breathe_mouth_2021-09-16T23_...,breathe_nose\bad_breathe_nose_2021-09-16T23_17...
1,Male,18,kiên giang,['No'],['No'],No,never,never,No,True,iPhone 8,cough/good_cough_2021-09-15T03:31:04.266Z,0,25.429333,27.989333,21.077333,breathe_mouth\good_breathe_mouth_2021-09-15T03...,breathe_nose\good_breathe_nose_2021-09-15T03_3...
2,Female,27,Ho Chi Minh,"['fever', 'drycough', 'wetcough', 'stuffynose'...",['otherHeart'],No,never,over14,No,True,Laptop/Desktop,cough/bad_cough_2021-09-11T05:16:40.570Z,1,29.610667,29.610667,29.781333,breathe_mouth\bad_breathe_mouth_2021-09-11T05_...,breathe_nose\bad_breathe_nose_2021-09-11T05_16...
3,Male,18,Ha Noi,['No'],['No'],No,never,never,No,True,samsung SM-J730G,cough/good_cough_2021-08-22T09:56:06.509Z,0,18.517333,19.882667,19.968,breathe_mouth\good_breathe_mouth_2021-08-22T09...,breathe_nose\good_breathe_nose_2021-08-22T09_5...
4,Female,18,long an,['No'],['No'],No,never,never,No,True,iPhone XR,cough/good_cough_2021-08-08T14:11:13.810Z,0,27.648,26.453333,27.562667,breathe_mouth\good_breathe_mouth_2021-08-08T14...,breathe_nose\good_breathe_nose_2021-08-08T14_1...


In [4]:

audio_columns = ['nose_path', 'label']
df = df[audio_columns]

df.head()

Unnamed: 0,nose_path,label
0,breathe_nose\bad_breathe_nose_2021-09-16T23_17...,1
1,breathe_nose\good_breathe_nose_2021-09-15T03_3...,0
2,breathe_nose\bad_breathe_nose_2021-09-11T05_16...,1
3,breathe_nose\good_breathe_nose_2021-08-22T09_5...,0
4,breathe_nose\good_breathe_nose_2021-08-08T14_1...,0


In [5]:
# print("NaN in mouth_path:", df['mouth_path'].isna().sum())


In [6]:
print("NaN in nose_path:", df['nose_path'].isna().sum())


NaN in nose_path: 0


In [7]:
# # استبدال : بـ _ في عمود file_cough
# df['file_cough'] = df['file_cough'].str.replace(":", "_", regex=False)


In [8]:
# def fix_cough_path(path):
#     if not path.endswith(".wav"):
#         path = path + ".wav"
#     return path

# df['file_cough'] = df['file_cough'].apply(fix_cough_path)


In [9]:
df.head()

Unnamed: 0,nose_path,label
0,breathe_nose\bad_breathe_nose_2021-09-16T23_17...,1
1,breathe_nose\good_breathe_nose_2021-09-15T03_3...,0
2,breathe_nose\bad_breathe_nose_2021-09-11T05_16...,1
3,breathe_nose\good_breathe_nose_2021-08-22T09_5...,0
4,breathe_nose\good_breathe_nose_2021-08-08T14_1...,0


#  Audio Parameters

MAX_LEN: اختاري طول sequence مناسب بناءً على أطول صوت عندك، لو أصغر أو أكبر ممكن تعدلي.

MFCC_N: 40 عادة ممتازة، ممكن تزودي أو تنقصي حسب التجربة.

In [11]:
SR = 22050  # Sample rate
MFCC_N = 40  # عدد MFCCs لكل frame
MAX_LEN = 500  # أقصى طول sequence بعد padding/trimming

#  Function to Load Audio & Extract MFCC

In [12]:

# def load_audio_mfcc(file_path, sr=SR, n_mfcc=MFCC_N, max_len=MAX_LEN):
#     """
#     Load an audio file, convert to MFCC, pad/truncate to max_len
#     """
#     y, _ = librosa.load(file_path, sr=sr)
#     mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
#     mfcc = mfcc.T  # shape: (time_steps, n_mfcc)
    
#     # Padding or truncating to max_len
#     if mfcc.shape[0] < max_len:
#         pad_width = max_len - mfcc.shape[0]
#         mfcc = np.pad(mfcc, ((0, pad_width), (0,0)), mode='constant')
#     else:
#         mfcc = mfcc[:max_len, :]
        
#     return mfcc


In [13]:
def load_nose_mfcc(file_path, sr=SR, n_mfcc=MFCC_N, max_len=MAX_LEN):
    """
    Load nose breathing audio only,
    convert to MFCC, pad/truncate to max_len
    """
    
    # Load audio
    y, _ = librosa.load(file_path, sr=sr)
    
    # Extract MFCC
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc = mfcc.T  # shape: (time_steps, n_mfcc)
    
    # Padding or truncating
    if mfcc.shape[0] < max_len:
        pad_width = max_len - mfcc.shape[0]
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_len, :]
        
    return mfcc


#  Prepare Dataset

In [14]:

# X = []
# y = []

# for idx, row in df.iterrows():
#     # Load MFCCs for mouth, nose, cough
#     mfcc_mouth = load_audio_mfcc(row['mouth_path'])
#     mfcc_nose = load_audio_mfcc(row['nose_path'])
#     mfcc_cough = load_audio_mfcc(row['file_cough'])
    
#     # Concatenate along feature axis
#     # final shape: (time_steps, n_mfcc*3)
#     mfcc_concat = np.concatenate([mfcc_mouth, mfcc_nose, mfcc_cough], axis=1)
    
#     X.append(mfcc_concat)
#     y.append(row['label'])

# X = np.array(X)
# y = np.array(y)

# print("Dataset shape:", X.shape, y.shape)
# # expected shape: (num_samples, MAX_LEN, MFCC_N*3)

In [15]:
X = []
y = []

for idx, row in df.iterrows():
    mfcc_nose = load_nose_mfcc(row['nose_path'])
    X.append(mfcc_nose)
    y.append(row['label'])

X = np.array(X)
y = np.array(y)

print("Dataset shape:", X.shape, y.shape)


Dataset shape: (1310, 500, 40) (1310,)


# Split into Train/Test

In [16]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


#  Build LSTM Model

In [17]:

# model = Sequential()
# model.add(Masking(mask_value=0., input_shape=(MAX_LEN, MFCC_N*3)))  # ignore padded zeros
# model.add(LSTM(128, return_sequences=False))
# model.add(Dropout(0.3))
# model.add(Dense(64, activation='relu'))
# model.add(Dense(1, activation='sigmoid'))  # Binary classification

# model.compile(
#     loss='binary_crossentropy',
#     optimizer='adam',
#     metrics=['accuracy']
# )

# model.summary()

In [18]:
model = Sequential()

# بدل MFCC_N*3 نخليه MFCC_N بس
model.add(Masking(mask_value=0., input_shape=(MAX_LEN, MFCC_N)))

model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()


  super().__init__(**kwargs)


#  Train Model

In [19]:

# early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# history = model.fit(
#     X_train, y_train,
#     validation_split=0.2,
#     epochs=50,
#     batch_size=16,
#     callbacks=[early_stop]
# )

In [20]:
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)


history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=16,
    callbacks=[early_stop],
    shuffle=True
)

Epoch 1/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 159ms/step - accuracy: 0.6212 - loss: 0.6676 - val_accuracy: 0.6527 - val_loss: 0.6329
Epoch 2/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 148ms/step - accuracy: 0.6555 - loss: 0.6348 - val_accuracy: 0.6603 - val_loss: 0.6261
Epoch 3/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 144ms/step - accuracy: 0.6441 - loss: 0.6253 - val_accuracy: 0.6565 - val_loss: 0.6323
Epoch 4/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 145ms/step - accuracy: 0.6422 - loss: 0.6163 - val_accuracy: 0.6565 - val_loss: 0.6191
Epoch 5/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 143ms/step - accuracy: 0.6527 - loss: 0.6212 - val_accuracy: 0.6565 - val_loss: 0.6285
Epoch 6/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 161ms/step - accuracy: 0.6632 - loss: 0.5988 - val_accuracy: 0.6603 - val_loss: 0.6282
Epoch 7/50
[1m66/66[0

#  Evaluate Model

In [21]:

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Test Accuracy: {acc:.4f}")
print(f"Test F1-Score: {f1:.4f}")

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 111ms/step
Test Accuracy: 0.6565
Test F1-Score: 0.1667


#  Save Model

In [23]:
model.save("sound_model.keras")
print("Model saved as sound_model.h5")

Model saved as sound_model.h5


# tensorflow lite

In [8]:
import tensorflow as tf

# تحميل الموديل الأصلي
model = tf.keras.models.load_model("sound_model.keras")

# حفظه بصيغة Keras native (.keras)
model.save("sound_model_no_optimizer.keras", include_optimizer=False)

print("تم حفظ الموديل بصيغة .keras بدون optimizer ✅")


تم حفظ الموديل بصيغة .keras بدون optimizer ✅
