<a href="https://colab.research.google.com/github/MohiteYash/baby/blob/main/baby.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import librosa
import librosa.display
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define Constants
DATASET_PATH = r'/content/drive/MyDrive/Baby_dataset/Baby_crying _Dataset'
IMG_SIZE = (128, 128)  # Mel spectrogram size
num_classes = 5  # Categories: belly pain, burping, discomfort, hungry, tired

# Function to Convert Audio to Fixed-Size Mel Spectrogram
def extract_spectrogram(file_path, img_size=IMG_SIZE):
    y, sr = librosa.load(file_path, sr=22050)
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)  # Convert to dB scale

    # Ensure fixed width (truncate or pad)
    if mel_spec_db.shape[1] < img_size[1]:
        # Pad with zeros if shorter
        pad_width = img_size[1] - mel_spec_db.shape[1]
        mel_spec_db = np.pad(mel_spec_db, ((0, 0), (0, pad_width)), mode='constant')
    else:
        # Truncate if longer
        mel_spec_db = mel_spec_db[:, :img_size[1]]

    mel_spec_db = np.expand_dims(mel_spec_db, axis=-1)  # Add channel dimension
    return mel_spec_db

# Load Dataset
X, y = [], []
for label in os.listdir(DATASET_PATH):
    label_path = os.path.join(DATASET_PATH, label)
    if os.path.isdir(label_path):
        for file in os.listdir(label_path):
            if file.endswith(".wav"):
                file_path = os.path.join(label_path, file)
                spectrogram = extract_spectrogram(file_path)
                X.append(spectrogram)
                y.append(label)

# Convert to NumPy Arrays
X = np.array(X)  # ✅ All spectrograms are now (128,128,1)
y = np.array(y)

# Encode Labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)  # Convert labels to integers
y = to_categorical(y, num_classes=num_classes)  # One-hot encode

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Reshape for CNN Input
X_train = X_train.reshape(X_train.shape[0], IMG_SIZE[0], IMG_SIZE[1], 1)
X_test = X_test.reshape(X_test.shape[0], IMG_SIZE[0], IMG_SIZE[1], 1)

# Verify Shapes
print("X_train shape:", X_train.shape)  # Expected: (num_samples, 128, 128, 1)
print("y_train shape:", y_train.shape)  # Expected: (num_samples, 5)

X_train shape: (365, 128, 128, 1)
y_train shape: (365, 5)


In [None]:
# CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 1)),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')  # Output layer for classification
])

# Compile the Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the Model
history = model.fit(X_train, y_train, epochs=40, batch_size=32, validation_data=(X_test, y_test))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 357ms/step - accuracy: 0.5634 - loss: 17.1446 - val_accuracy: 0.8370 - val_loss: 0.7513
Epoch 2/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.8332 - loss: 0.7424 - val_accuracy: 0.8370 - val_loss: 0.8863
Epoch 3/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.8239 - loss: 0.8778 - val_accuracy: 0.8370 - val_loss: 1.0726
Epoch 4/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.8381 - loss: 0.8606 - val_accuracy: 0.8370 - val_loss: 0.7784
Epoch 5/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.8235 - loss: 0.8730 - val_accuracy: 0.8370 - val_loss: 0.7480
Epoch 6/40
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.8341 - loss: 0.7432 - val_accuracy: 0.8370 - val_loss: 0.6707
Epoch 7/40
[1m12/12[0m [32m━

In [None]:
# Evaluate Model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc * 100:.2f}%")


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.7422 - loss: 1.6043
Test Accuracy: 75.00%


In [None]:
# Save the Model

model.save("baby_cry_cnn_model.h5")

# Load the Model
loaded_model = tf.keras.models.load_model("baby_cry_cnn_model.h5")




In [None]:
# Function to Predict Baby Cry Reason
def predict_cry_reason(file_path, model):
    spectrogram = extract_spectrogram(file_path)
    spectrogram = spectrogram.reshape(1, IMG_SIZE[0], IMG_SIZE[1], 1)  # Reshape for model input
    prediction = model.predict(spectrogram)
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
    return predicted_label[0]

# Example Prediction
sample_file = "/content/drive/MyDrive/Baby_dataset/Baby_crying _Dataset/tired/5B416CE8-2591-4531-9ADC-86D085B5D48B-1430144827-1.0-m-48-ti.wav"  # Update with a test file
predicted_reason = predict_cry_reason(sample_file, loaded_model)
print(f"Predicted Baby Cry Reason: {predicted_reason}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Predicted Baby Cry Reason: tired
