<a href="https://colab.research.google.com/github/DiptaDhar2020/Heart-Sound-Classifier-Thesis/blob/main/proposed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import soundfile as sf
import numpy as np
import librosa
import librosa.feature
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import pickle

# Get the directory of the audio files
audio_directory = "/path/to/audio/directory"

# Get a list of the audio files in the directory
audio_files = os.listdir(audio_directory)

# Initialize a list to store the audio data
audio_data = []

# Initialize a list to store the labels
labels = []

# For each audio file in the directory
for audio_file in audio_files:

    # Load the audio file
    audio, _ = librosa.load(os.path.join(audio_directory, audio_file))

    # Extract time-domain, frequency-domain, and time-frequency features
    time_domain_features = librosa.feature.melspectrogram(audio)
    frequency_domain_features = librosa.core.spectrum.power_spectrum(audio)
    time_frequency_features = librosa.feature.mfcc(audio)

    # Get the label of the audio file
    label = audio_file.split(".")[0]

    # If the label is "healthy", set the label to 1. Otherwise, set the label to 0.
    if label == "healthy":
        label = 1
    else:
        label = 0

    # Add the features and label to the list
    audio_data.append(time_frequency_features)
    labels.append(label)

# Train a KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(audio_data, labels)

# Train a random forest model
random_forest_model = RandomForestClassifier(n_estimators=100)
random_forest_model.fit(audio_data, labels)

# Train a naive Bayes model
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(audio_data, labels)

# Evaluate the models
knn_predictions = knn_model.predict(audio_data)
random_forest_predictions = random_forest_model.predict(audio_data)
naive_bayes_predictions = naive_bayes_model.predict(audio_data)

# Calculate the accuracy of the models
knn_accuracy = accuracy_score(labels, knn_predictions)
random_forest_accuracy = accuracy_score(labels, random_forest_predictions)
naive_bayes_accuracy = accuracy_score(labels, naive_bayes_predictions)

# Print the accuracies
print("KNN accuracy:", knn_accuracy)
print("Random forest accuracy:", random_forest_accuracy)
print("Naive Bayes accuracy:", naive_bayes_accuracy)

# Classify a new heart sound
new_heart_sound, _ = librosa.load("new_heart_sound.wav")
features = librosa.feature.mfcc(new_heart_sound)
prediction = knn_model.predict(features)

# Print the prediction
if prediction == 1:
    print("The prediction is: healthy")
else:
    print("The prediction is: unhealthy")