## Importing the modules

In [41]:
import librosa.display
import numpy as np
import random
import pickle
import audiomentations as A
import soundfile as sf
import os
import librosa
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Data augmentation, Data pre-processing & Data Normalization

In [42]:
# Load X and y from pickle files
with open('x.pkl', 'rb') as file:
    X = pickle.load(file)

with open('y.pkl', 'rb') as file:
    y = pickle.load(file)

## Data sampling

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.1, random_state=42)

## Model training

## Decision tree

In [44]:
dt = DecisionTreeClassifier(max_depth=5)
dt.fit(X_train,y_train)
dt_pred = dt.predict(X_test)
print("Accuracy score of Decision tree:",accuracy_score(y_test,dt_pred)*100,"%")

Accuracy score of Decision tree: 65.85365853658537 %


## Random forest classifier

In [45]:
rfc = RandomForestClassifier(random_state=32)
rfc.fit(X_train,y_train)
rfc_pred = rfc.predict(X_test)
print("Accuracy score of Random Forest:",accuracy_score(y_test,rfc_pred)*100,"%")

Accuracy score of Random Forest: 82.92682926829268 %


## Gradient Boosting

In [46]:
gb = GradientBoostingClassifier()
gb.fit(X_train,y_train)
gb_pred = gb.predict(X_test)
print("Accuracy score of Gradient Boosting:",accuracy_score(y_test,gb_pred)*100,"%")

Accuracy score of Gradient Boosting: 80.48780487804879 %


## Support vector machine

In [47]:
svm = SVC()
svm.fit(X_train,y_train)
svm_pred = svm.predict(X_test)
print("Accuracy score of SVC:",accuracy_score(y_test,svm_pred)*100,"%")

Accuracy score of SVC: 39.02439024390244 %


## Saving the best model

In [48]:
with open('GBmodel_pkl' , 'wb') as f:
    pickle.dump(gb,f)

## Loading saved model

In [49]:
with open('GBmodel_pkl' , 'rb') as f:
    GB = pickle.load(f)

## Preprocessing of testing audio file

In [50]:
def Test_preprocess_audio(audio_file):
        num_mfcc_coefficients = 13
        
        Test_preprocess_data = []
        original_audio, sr = sf.read(audio_file)

        # Apply augmentation to create augmented audio
        augment1 = A.AddGaussianNoise(p=0.2)
        augment2 = A.TimeStretch(p=0.2)
        augment3 = A.PitchShift(p=0.2)
        augment4 = A.Shift(p=0.2)
        augment5 = A.TimeMask(p=0.2)

        augmented_audio1 = augment1(samples=original_audio, sample_rate=sr)
        augmented_audio2 = augment2(samples=original_audio, sample_rate=sr)
        augmented_audio3 = augment3(samples=original_audio, sample_rate=sr)
        augmented_audio4 = augment4(samples=original_audio, sample_rate=sr)
        augmented_audio5 = augment5(samples=original_audio, sample_rate=sr)

        # Perform feature extraction (e.g., MFCCs) on original and augmented audio
        for audio in [original_audio, augmented_audio1, augmented_audio2, augmented_audio3, augmented_audio4, augmented_audio5]:
            mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=num_mfcc_coefficients)

            # Normalize the MFCCs (optional but recommended)
            # mfccs = (mfccs - np.mean(mfccs)) / np.std(mfccs)

            # # Reshape or pad the MFCCs to match the desired input shape
            # num_frames = mfccs.shape[1]
            # if num_frames < desired_shape[0]:
            #     mfccs = np.pad(mfccs, ((0, 0), (0, desired_shape[0] - num_frames)), mode='constant')
            # elif num_frames > desired_shape[0]:
            #     mfccs = mfccs[:, :desired_shape[0]]

            # Append the preprocessed data and label
            Test_preprocess_data.append(mfccs.T)  # Transpose the data
            
        # Stack the preprocessed data into a 3D array
        X = np.array(Test_preprocess_data).reshape(-1,13)

        return X

## Predicting the audio label

In [51]:
##['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']
def Predict_Label(audio_file):
    processed_data = (Test_preprocess_audio(audio_file))
    y_pred=GB.predict(processed_data)
    y_pred = np.argmax(y_pred)
    print(y_pred)
    # y_pred = int(np.median(y_pred))
    # print(y_pred)
    if y_pred == 0:
        print('belly_pain')
    if y_pred == 1:
        print('burping')
    if y_pred == 2:
        print('discomfort')
    if y_pred == 3:
        print('hungry')
    if y_pred == 4:
        print('tired')

In [52]:
Predict_Label("donateacry/tired/7A22229D-06C2-4AAA-9674-DE5DF1906B3A-1436891957-1.1-m-72-ti.wav")

40




In [53]:
Predict_Label("donateacry/hungry/0D1AD73E-4C5E-45F3-85C4-9A3CB71E8856-1430742197-1.0-m-04-hu.wav")

0
belly_pain




In [54]:
Predict_Label("donateacry/belly_pain/643D64AD-B711-469A-AF69-55C0D5D3E30F-1430138506-1.0-m-72-bp.wav")

0
belly_pain


