In [1]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import make_pipeline
from imblearn.over_sampling import SMOTE

# Paths
dataset_path = os.path.normpath(r'D:/OneDrive - uem.edu.in/Backup/Project/Baby Cey interpreter/archive/donateacry_corpus')  # Update this path

# Define categories
categories = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']

# Feature extraction function
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    
    # Adjust Chroma and Spectral Contrast parameters
    chroma = librosa.feature.chroma_stft(y=y, sr=sr, n_chroma=12)
    zcr = librosa.feature.zero_crossing_rate(y)
    spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr, n_bands=6, fmin=50)
    
    # Aggregate features
    mfccs_mean = np.mean(mfccs.T, axis=0)
    chroma_mean = np.mean(chroma.T, axis=0)
    zcr_mean = np.mean(zcr.T, axis=0)
    spec_contrast_mean = np.mean(spec_contrast.T, axis=0)
    
    return np.concatenate((mfccs_mean, chroma_mean, zcr_mean, spec_contrast_mean))

# Prepare data
def prepare_data():
    features = []
    labels = []
    for category in categories:
        folder_path = os.path.join(dataset_path, category)
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.wav'):
                file_path = os.path.join(folder_path, file_name)
                try:
                    feature = extract_features(file_path)
                    features.append(feature)
                    labels.append(category)
                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")
    return np.array(features), np.array(labels)

# Load data
X, y = prepare_data()

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Handle imbalanced data
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y_encoded)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Create a pipeline for scaling and training
clf = make_pipeline(StandardScaler(), RandomForestClassifier(n_estimators=200, random_state=42, max_depth=15, class_weight='balanced'))
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
print(classification_report(y_test, y_pred, target_names=categories))

# Function to predict
def predict_audio(file_path):
    feature = extract_features(file_path)
    feature = feature.reshape(1, -1)  # Reshape for a single sample
    prediction = clf.predict(feature)
    return label_encoder.inverse_transform(prediction)[0]

# User-defined input for file path
test_file = input("Please enter the path of the audio file you want to predict: ").strip('\"')
print(f'The reason behind the baby cry is: {predict_audio(test_file)}')

Accuracy: 98.17%
              precision    recall  f1-score   support

  belly_pain       0.99      1.00      0.99        73
     burping       0.95      1.00      0.97        72
  discomfort       0.99      1.00      0.99        80
      hungry       1.00      0.92      0.96        84
       tired       0.99      1.00      0.99        73

    accuracy                           0.98       382
   macro avg       0.98      0.98      0.98       382
weighted avg       0.98      0.98      0.98       382

The reason behind the baby cry is: burping
