In [14]:
import os
import numpy as np
import librosa
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle


In [15]:
# Function to extract features from audio file
def extract_features(file_path, mfcc=True, chroma=True, mel=True):
    with open(file_path, "rb") as f:
        y, sr = librosa.load(f)

    features = []
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
        features.extend(mfccs)
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
        features.extend(chroma)
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
        features.extend(mel)

    return features


In [16]:
# Function to train and save a classifier and return the trained model
def train_and_save_classifier(X_train, y_train, model_path, classifier):
    classifier.fit(X_train, y_train)
    with open(model_path, 'wb') as f:
        pickle.dump(classifier, f)
    return classifier

# Function to load classifier
def load_classifier(model_path):
    with open(model_path, 'rb') as f:
        classifier = pickle.load(f)
    return classifier

# Function to classify audio file and provide reason
def classify_audio(audio_file, classifier):
    features = extract_features(audio_file)
    prediction = classifier.predict([features])[0]
    class_mapping = {0: "tired", 1: "burping", 2: "hungry", 3: "discomfort", 4: "belly_pain"}
    predicted_class = class_mapping[prediction]
    
    reason_mapping = {
        "belly_pain": "Colic or Belly pain",
        "hungry": "Exhaustion or hunger",
        "discomfort": "Lack of Affection and Attention or discomfrot",
        "tired": "tirednessor lack of sleep"
    }
    
    reason = reason_mapping.get(predicted_class, predicted_class)
    return reason

# Path to the directory containing audio files
audio_dir = "C:/Users/hp/Downloads/flaskdep/donateacry_corpus_cleaned_and_updated_data"
class_mapping = {
    "tired": 0,
    "burping": 1,
    "hungry": 2,
    "discomfort": 3,
    "belly_pain": 4
}

# Function to load audio files, extract features, and create feature files
feature_files = []
labels = []

In [17]:
def process_folder(folder_path, label_id):
    files = [f for f in os.listdir(folder_path) if f.endswith(".wav")]
    for filename in files:
        file_path = os.path.join(folder_path, filename)
        features = extract_features(file_path)
        feature_files.append(features)
        labels.append(label_id)

# Process each folder
for folder, label_id in class_mapping.items():
    folder_path = os.path.join(audio_dir, folder)
    process_folder(folder_path, label_id)

# Convert lists to numpy arrays
X = np.array(feature_files)
y = np.array(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train and save classifiers
rf_model_path = "C:/Users/hp/Downloads/flaskdep/rf_model.pkl"
rf_classifier = train_and_save_classifier(X_train, y_train, rf_model_path, RandomForestClassifier(n_estimators=100, random_state=42))
print("Random Forest Model trained and saved successfully!")

svm_model_path = "C:/Users/hp/Downloads/flaskdep/svm_model.pkl"
svm_classifier = train_and_save_classifier(X_train, y_train, svm_model_path, SVC(kernel='linear', random_state=42))
print("SVM Model trained and saved successfully!")

gb_model_path = "C:/Users/hp/Downloads/flaskdep/gb_model.pkl"
gb_classifier = train_and_save_classifier(X_train, y_train, gb_model_path, GradientBoostingClassifier(n_estimators=100, random_state=42))
print("Gradient Boosting Model trained and saved successfully!")

knn_model_path = "C:/Users/hp/Downloads/flaskdep/knn_model.pkl"
knn_classifier = train_and_save_classifier(X_train, y_train, knn_model_path, KNeighborsClassifier(n_neighbors=5))
print("KNN Model trained and saved successfully!")

Random Forest Model trained and saved successfully!
SVM Model trained and saved successfully!
Gradient Boosting Model trained and saved successfully!
KNN Model trained and saved successfully!


In [18]:
# Train and save classifiers
rf_model_path = "C:/Users/hp/Downloads/flaskdep/rf_model.pkl"
rf_classifier = train_and_save_classifier(X_train, y_train, rf_model_path, RandomForestClassifier(n_estimators=100, random_state=42))
print("Random Forest Model trained and saved successfully!")

svm_model_path = "C:/Users/hp/Downloads/flaskdep/svm_model.pkl"
svm_classifier = train_and_save_classifier(X_train, y_train, svm_model_path, SVC(kernel='linear', random_state=42))
print("SVM Model trained and saved successfully!")

gb_model_path = "C:/Users/hp/Downloads/flaskdep/gb_model.pkl"
gb_classifier = train_and_save_classifier(X_train, y_train, gb_model_path, GradientBoostingClassifier(n_estimators=100, random_state=42))
print("Gradient Boosting Model trained and saved successfully!")

knn_model_path = "C:/Users/hp/Downloads/flaskdep/knn_model.pkl"
knn_classifier = train_and_save_classifier(X_train, y_train, knn_model_path, KNeighborsClassifier(n_neighbors=5))
print("KNN Model trained and saved successfully!")


Random Forest Model trained and saved successfully!
SVM Model trained and saved successfully!
Gradient Boosting Model trained and saved successfully!
KNN Model trained and saved successfully!


In [19]:
# Evaluate the classifiers
rf_predictions = rf_classifier.predict(X_test)
svm_predictions = svm_classifier.predict(X_test)
gb_predictions = gb_classifier.predict(X_test)
knn_predictions = knn_classifier.predict(X_test)

rf_accuracy = accuracy_score(y_test, rf_predictions)
svm_accuracy = accuracy_score(y_test, svm_predictions)
gb_accuracy = accuracy_score(y_test, gb_predictions)
knn_accuracy = accuracy_score(y_test, knn_predictions)

print(f"Random Forest Accuracy: {rf_accuracy}")
print(f"SVM Accuracy: {svm_accuracy}")
print(f"Gradient Boosting Accuracy: {gb_accuracy}")
print(f"KNN Accuracy: {knn_accuracy}")

# Function to classify an input audio file using the trained models
def classify_input_audio(audio_file_path, model_type):
    model_paths = {
        "random_forest": rf_model_path,
        "svm": svm_model_path,
        "gradient_boosting": gb_model_path,
        "knn": knn_model_path
    }

    if model_type not in model_paths:
        raise ValueError("Invalid model type. Choose from 'random_forest', 'svm', 'gradient_boosting', or 'knn'.")

    classifier = load_classifier(model_paths[model_type])
    reason = classify_audio(audio_file_path, classifier)
    return reason

# Example usage: Classify an audio file using Random Forest model
audio_file_path = "donateacry_corpus_cleaned_and_updated_data/hungry/0a983cd2-0078-4698-a048-99ac01eb167a-1433917038889-1.7-f-04-hu.wav"
model_type = "random_forest"  # Choose from 'random_forest', 'svm', 'gradient_boosting', 'knn'
predicted_reason = classify_input_audio(audio_file_path, model_type)
print(f"The predicted reason for the audio file is: {predicted_reason}")

Random Forest Accuracy: 0.7934782608695652
SVM Accuracy: 0.6304347826086957
Gradient Boosting Accuracy: 0.7608695652173914
KNN Accuracy: 0.7608695652173914
The predicted reason for the audio file is: Exhaustion or hunger
