In [73]:
print(df.columns)


Index(['Patient ID', 'Locations', 'Age', 'Sex', 'Height', 'Weight',
       'Pregnancy status', 'Murmur', 'Murmur locations',
       'Most audible location', 'Systolic murmur timing',
       'Systolic murmur shape', 'Systolic murmur grading',
       'Systolic murmur pitch', 'Systolic murmur quality',
       'Diastolic murmur timing', 'Diastolic murmur shape',
       'Diastolic murmur grading', 'Diastolic murmur pitch',
       'Diastolic murmur quality', 'Campaign', 'Additional ID',
       'valid_file_paths', 'features'],
      dtype='object')


In [107]:
import os
import librosa
import librosa.display
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

def load_and_preprocess_audio(file_path, sr=22050, n_mfcc=13, max_length=200):
    y, sr = librosa.load(file_path, sr=sr)
    y = librosa.util.normalize(y)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfccs = mfccs.T  # Transpose to shape (time_steps, n_mfcc)
    mfccs = pad_sequences([mfccs], maxlen=max_length, dtype='float32', padding='post', truncating='post')[0]
    return mfccs

def process_dataset(audio_folder, csv_file, max_length=200):
    df = pd.read_csv(csv_file)
    X, y = [], []

    for _, row in df.iterrows():
        patient_id = str(row['Patient ID'])
        murmur = row['Murmur']
        murmur_loc = row['Murmur locations']
        systolic_timing = row['Systolic murmur timing']
        diastolic_timing = row['Diastolic murmur timing']

        patient_folder = os.path.join(audio_folder, patient_id)
        if os.path.exists(patient_folder):
            for file in os.listdir(patient_folder):
                if file.endswith(('.wav', '.mp3')):
                    file_path = os.path.join(patient_folder, file)
                    mfccs = load_and_preprocess_audio(file_path, max_length=max_length)
                    X.append(mfccs)
                    y.append([murmur, murmur_loc, systolic_timing, diastolic_timing])

    return np.array(X), y

def build_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Paths (Edit these based on your dataset)
audio_folder = "/content/Organized_Patients"
csv_file = "/content/drive/MyDrive/training_data.csv"

# Load dataset
X, y = process_dataset(audio_folder, csv_file)
X = np.expand_dims(X, axis=-1)  # Add channel dimension

# Encode labels
unique_labels = list(set([tuple(label) for label in y]))
label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
y_encoded = np.array([label_to_index[tuple(label)] for label in y])
y_categorical = to_categorical(y_encoded, num_classes=len(unique_labels))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

# Build and train model
input_shape = X_train.shape[1:]
num_classes = len(unique_labels)
model = build_model(input_shape, num_classes)
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 91ms/step - accuracy: 0.5879 - loss: 18.5697 - val_accuracy: 0.7299 - val_loss: 1.5294
Epoch 2/20
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 64ms/step - accuracy: 0.7583 - loss: 1.5257 - val_accuracy: 0.7299 - val_loss: 1.6394
Epoch 3/20
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 90ms/step - accuracy: 0.7521 - loss: 1.5428 - val_accuracy: 0.7299 - val_loss: 1.3674
Epoch 4/20
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - accuracy: 0.7467 - loss: 1.4176 - val_accuracy: 0.7299 - val_loss: 1.3350
Epoch 5/20
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 65ms/step - accuracy: 0.7603 - loss: 1.2844 - val_accuracy: 0.7299 - val_loss: 1.3530
Epoch 6/20
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 93ms/step - accuracy: 0.7636 - loss: 1.2857 - val_accuracy: 0.7299 - val_loss: 1.4250
Epoch 7/20
[1m80/80[0m [32m━━━

In [114]:
import librosa
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

def classify_heart_sound(model, file_path, max_length=200, sr=22050, n_mfcc=13):
    # Load and preprocess the input audio
    y, sr = librosa.load(file_path, sr=sr)
    y = librosa.util.normalize(y)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc).T

    # Pad or truncate to ensure consistent shape
    mfccs = pad_sequences([mfccs], maxlen=max_length, dtype='float32', padding='post', truncating='post')[0]
    mfccs = np.expand_dims(mfccs, axis=-1)  # Add channel dimension
    mfccs = np.expand_dims(mfccs, axis=0)   # Add batch dimension

    # Predict
    predictions = model.predict(mfccs)
    predicted_label_index = np.argmax(predictions)

    # Decode label
    index_to_label = {idx: label for label, idx in label_to_index.items()}
    predicted_label = index_to_label[predicted_label_index]

    print(f"Predicted Classification: {predicted_label}")
    return predicted_label

# Example usage (edit the file path accordingly):
file_path = "/content/drive/MyDrive/murmur__197_1308141235553_C.wav"
predicted_class = classify_heart_sound(model, file_path)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Predicted Classification: ('Present', 'AV+MV+PV+TV', 'Holosystolic', nan)


In [94]:
import os
import shutil

# Define the directory containing the audio files and the target directory
source_directory = "/content/drive/MyDrive/Severity_training_data"  # Replace with the actual path to your audio files
target_directory = "Organized_Patients"

# Ensure the target directory exists
os.makedirs(target_directory, exist_ok=True)

# Iterate through all files in the source directory
for filename in os.listdir(source_directory):
    if filename.endswith(".wav"):  # Process only .wav files
        # Extract the patient ID and location from the filename
        patient_id, location = filename.split("_")[0], filename.split("_")[-1]
        location = location.split(".")[0]

        # Create a directory for the patient if it doesn't exist
        patient_folder = os.path.join(target_directory, patient_id)
        os.makedirs(patient_folder, exist_ok=True)

        # Move or copy the audio file to the patient's folder
        source_file_path = os.path.join(source_directory, filename)
        target_file_path = os.path.join(patient_folder, filename)
        shutil.move(source_file_path, target_file_path)  # Use shutil.copy if you want to copy instead

print("Audio files have been organized successfully.")


Audio files have been organized successfully.


In [96]:
import os
import shutil

# Define the directory containing the audio files and the target directory
source_directory = "/content/drive/MyDrive/Severity_training_data"  # Replace with the actual path to your audio files
target_directory = "Organized_Patients22222222222"

# Ensure the target directory exists
os.makedirs(target_directory, exist_ok=True)

# Define the possible labels
labels = ['_AV', '_TV', '_PV', '_MV']

# Create folders for each label within the target directory
for label in labels:
    label_folder = os.path.join(target_directory, label)
    os.makedirs(label_folder, exist_ok=True)

# Iterate through all files in the source directory
for filename in os.listdir(source_directory):
    if filename.endswith(".wav"):  # Process only .wav files
        # Extract the patient ID and location from the filename
        patient_id, location = filename.split("_")[0], filename.split("_")[-1]
        location = location.split(".")[0]

        # Create a dictionary to store which labels are present for this patient
        patient_labels = {label: False for label in labels}

        # Check if each label is present in the filename
        for label in labels:
            if label in filename:
                patient_labels[label] = True

        # Now move the file into the corresponding label folder for the patient
        for label, is_present in patient_labels.items():
            if is_present:  # If the label is present in the filename
                # Create a directory for the patient within the label folder if it doesn't exist
                patient_folder = os.path.join(target_directory, label, patient_id)
                os.makedirs(patient_folder, exist_ok=True)

                # Move the audio file to the appropriate folder
                source_file_path = os.path.join(source_directory, filename)
                target_file_path = os.path.join(patient_folder, filename)
                shutil.move(source_file_path, target_file_path)  # Use shutil.copy to copy instead of move

print("Audio files have been organized successfully.")


Audio files have been organized successfully.
