In [1]:
import librosa
import numpy as np
import os
# Set the duration and hop length for audio processing
duration = 5  # seconds
hop_length = 512

# Define a function to load and extract LFCC features from an audio file
def extract_lfcc_features(file_path):
    # Load the audio file and extract the LFCC features
    y, sr = librosa.load(file_path, duration=duration)
    lfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=hop_length)
    return lfcc

# Define a function to load and preprocess the dataset
def load_dataset(dataset_path):
    # Load the audio files and extract the LFCC features
    lfcc_features = []
    labels = []
    for label, class_name in enumerate(sorted(os.listdir(dataset_path))):
        class_path = os.path.join(dataset_path, class_name)
        for file_name in os.listdir(class_path):
            file_path = os.path.join(class_path, file_name)
            lfcc = extract_lfcc_features(file_path)
            lfcc_features.append(lfcc)
            labels.append(label)
    # Convert the LFCC features and labels to numpy arrays
    lfcc_features = np.array(lfcc_features)
    labels = np.array(labels)
    # Normalize the LFCC features
    mean = np.mean(lfcc_features, axis=0)
    std = np.std(lfcc_features, axis=0)
    np.save('mean.npy', mean)
    np.save('std.npy', std)
    lfcc_features -= np.mean(lfcc_features, axis=0)
    lfcc_features /= np.std(lfcc_features, axis=0)
    
    return lfcc_features, labels


# Load the dataset
dataset_path = '/kaggle/input/detect-cry/data'
lfcc_features, labels = load_dataset(dataset_path)

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical

# Load the data
#lfcc_features = np.load('lfcc_features.npy')
#labels = np.load('labels.npy')

# Normalize the data
lfcc_features /= np.std(lfcc_features, axis=0)
num_classes=3
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(lfcc_features, labels, test_size=0.2, random_state=42)

# Convert the labels to categorical format
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
X_train = X_train.reshape(X_train.shape[0], 13, 216, 1)
X_test = X_test.reshape(X_test.shape[0], 13, 216, 1)
# Define the CNN architecture
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(13, 216, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
epochs = 100
batch_size = 32
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))

# Evaluate the model on the test set
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.summary()
model.save('detection.h5');

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [3]:
import librosa
import numpy as np
import requests
from keras.models import load_model

# Load the trained model
model = load_model('/kaggle/working/detection.h5')

# Load audio file
y, sr = librosa.load('/kaggle/input/detect-cry/data/Silence/silence.wav_40.wav', sr=16000)

# Extract MFCC features
mfcc_features = librosa.feature.mfcc(y=y, sr=sr, n_fft=1024, hop_length=372,n_mfcc=13)
mfcc_features = mfcc_features.T

# Reshape data for use with the model
#num_frames = 13  # change this to match the number of frames in your data
#num_features = 87
print(mfcc_features.shape)
mfcc_features = mfcc_features.reshape(1, 13, 216, 1)

# Make a prediction using the trained model
prediction = model.predict(mfcc_features)
#prediction_class = np.argmax(prediction)

# Print the predicted class
#classes = ['Hunger', 'Discomfort', 'Sleepiness', 'Pain', 'None']
#print('Predicted class:', classes[prediction_class])
print(prediction)
class_labels = ['lAUGH','Silence','Crying']
predicted_label = class_labels[np.argmax(prediction)]

# Print the predicted class label
print(predicted_label)


(216, 13)
[[0. 0. 1.]]
Crying
