In [2]:
 %pip install librosa
 %pip install scikit-learn
 %pip install pydub
 %pip install pandas



Collecting pydub
  Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import pandas as pd
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import pickle
from collections import Counter
from pydub import AudioSegment
from io import BytesIO
import wave
import math
import uuid

In [5]:
raw_audio = {}

directories = ['hungry', 'belly_pain', 'burping', 'discomfort', 'tired']
for directory in directories:
    path = r"./data/" + directory
    for filename in os.listdir(path):
        if filename.endswith(".wav"):
            raw_audio[os.path.join(path, filename)] = directory


In [6]:
def extract_mfcc(audio_file, max_length=100):
    audiofile, sr = librosa.load(audio_file)
    fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=20)
    if fingerprint.shape[1] < max_length:
        pad_width = max_length - fingerprint.shape[1]
        fingerprint_padded = np.pad(fingerprint, pad_width=((0, 0), (0, pad_width)), mode='constant')
        return fingerprint_padded.T
    elif fingerprint.shape[1] > max_length:
        return fingerprint[:, :max_length].T
    else:
        return fingerprint.T


X = []
y = []
max_length = 100

for i, (audio_file, label) in enumerate(raw_audio.items()):
    mfcc_features = extract_mfcc(audio_file, max_length=max_length)
    X.append(mfcc_features.flatten())
    y.append(label)

df = pd.DataFrame(X)
df = df.fillna(0)
df['label'] = y
df.to_csv('audio_dataset.csv', index=False)



In [7]:
X = np.array(X)
y = np.array(y)

X_flat = X.reshape(X.shape[0], -1)
y_flat = y

X_train, X_test, y_train, y_test = train_test_split(X_flat, y_flat, test_size=0.2, random_state=42)


In [9]:

# Train and evaluate models
models = [
    ('Random Forest', RandomForestClassifier(n_estimators=25, max_features=5)),
    ('Logistic Regression', LogisticRegression()),
    ('Decision Tree', DecisionTreeClassifier()),
    ('SVM', SVC()),
]



In [10]:
print("Model, Accuracy, Precision, Recall")
for model_name, model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    print(f"{model_name}: {accuracy}, {precision}, {recall}")

Model, Accuracy, Precision, Recall


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest: 0.8043478260869565, 0.6469754253308129, 0.8043478260869565


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression: 0.7065217391304348, 0.6729636763896533, 0.7065217391304348
Decision Tree: 0.717391304347826, 0.7059420289855073, 0.717391304347826
SVM: 0.8043478260869565, 0.6469754253308129, 0.8043478260869565


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [11]:
print(X_train.shape)


(365, 2000)


In [12]:
n_samples, n_features = X_train.shape[0], X_train.shape[1] // 100
n_timesteps = 100
X_train_lstm = X_train.reshape((n_samples, 100, 20))
n_samples_test = X_test.shape[0]
X_test_lstm = X_test.reshape((n_samples_test, n_timesteps, n_features))

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

lstm_model = Sequential([
    LSTM(units=128, input_shape=(n_timesteps, n_features)),
    Dropout(0.2),
    Dense(units=64, activation='relu'),
    Dropout(0.2),
    Dense(units=len(np.unique(y_train_encoded)), activation='softmax')
])


lstm_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

lstm_model.fit(X_train_lstm, y_train_encoded, epochs=10, batch_size=32, validation_split=0.2)

_, accuracy = lstm_model.evaluate(X_test_lstm, y_test_encoded)
print("Accuracy:", accuracy)

from sklearn.metrics import precision_score, recall_score

predicted_probabilities = lstm_model.predict(X_test_lstm)

predicted_labels = np.argmax(predicted_probabilities, axis=1)

precision = precision_score(y_test_encoded, predicted_labels, average='weighted')

recall = recall_score(y_test_encoded, predicted_labels, average='weighted')

print("Precision:", precision)
print("Recall:", recall)



Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.804347813129425
Precision: 0.6469754253308129
Recall: 0.8043478260869565


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [13]:
import joblib

joblib.dump(lstm_model, "lstm_audio_model.joblib")


['lstm_audio_model.joblib']

In [14]:
def pickle_model(model, modelname):
    directory = 'models'
    if not os.path.exists(directory):
        os.makedirs(directory)
    with open(os.path.join(directory, str(modelname) + '.pkl'), 'wb') as f:
        return pickle.dump(model, f)

model = RandomForestClassifier()
model.fit(X_train, y_train)
pickle_model(model, "myModel")

In [15]:
def getModel(pickle_path):
  with open(pickle_path, 'rb') as f:
        return pickle.load(f)

In [16]:
# %pip install pydub --user
%pip install ffmpeg-python

Collecting ffmpeg-pythonNote: you may need to restart the kernel to use updated packages.

  Using cached ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting future (from ffmpeg-python)
  Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
Using cached ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Downloading future-1.0.0-py3-none-any.whl (491 kB)
   ---------------------------------------- 0.0/491.3 kB ? eta -:--:--
    --------------------------------------- 10.2/491.3 kB ? eta -:--:--
   ---- ---------------------------------- 61.4/491.3 kB 656.4 kB/s eta 0:00:01
   ----------- ---------------------------- 143.4/491.3 kB 1.1 MB/s eta 0:00:01
   --------------- ------------------------ 194.6/491.3 kB 1.1 MB/s eta 0:00:01
   -------------------- ------------------- 245.8/491.3 kB 1.1 MB/s eta 0:00:01
   ------------------------ --------------- 297.0/491.3 kB 1.1 MB/s eta 0:00:01
   ---------------------------- ----------- 348.2/491.3 kB 1.1 MB/s eta 0:00:01
   ---

In [17]:
import os
import librosa
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblib
from collections import Counter

rf_model = joblib.load("./models/myModel.pkl")  # Replace with your model path

def extract_mfcc(audio_file, max_length=100):
    audiofile, sr = librosa.load(audio_file)
    fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=20)
    if fingerprint.shape[1] < max_length:
        pad_width = max_length - fingerprint.shape[1]
        fingerprint_padded = np.pad(fingerprint, pad_width=((0, 0), (0, pad_width)), mode='constant')
        return fingerprint_padded.T
    elif fingerprint.shape[1] > max_length:
        return fingerprint[:, :max_length].T
    else:
        return fingerprint.T

audio_file_path = './data/hungry/02c3b725-26e4-4a2c-9336-04ddc58836d9-1430726196216-1.7-m-04-hu.wav'  # Replace with your actual audio file path

mfcc_features = extract_mfcc(audio_file_path)

mfcc_features_flat = mfcc_features.flatten()

prediction = rf_model.predict([mfcc_features_flat])

print("Predicted Label:", prediction[0])

Predicted Label: hungry
