In [4]:
from google.colab import userdata
import os

os.environ["KAGGLE_KEY"] = userdata.get('KAGGLE_KEY')
os.environ["KAGGLE_USERNAME"] = userdata.get('KAGGLE_USERNAME')

In [5]:
#!kaggle datasets download -d mennaahmed23/baby-crying-sounds-dataset
!kaggle datasets download -d sanmithasadhish/infant-cry-dataset
#! unzip "infant-cry-audio-corpus.zip"
#! unzip "baby-crying-sounds-dataset.zip"
! unzip "infant-cry-dataset.zip"

Dataset URL: https://www.kaggle.com/datasets/sanmithasadhish/infant-cry-dataset
License(s): MIT
Downloading infant-cry-dataset.zip to /content
 81% 136M/169M [00:00<00:00, 1.42GB/s]
100% 169M/169M [00:00<00:00, 1.13GB/s]
Archive:  infant-cry-dataset.zip
  inflating: Dataset/cry/100c.wav    
  inflating: Dataset/cry/101c.wav    
  inflating: Dataset/cry/102c.wav    
  inflating: Dataset/cry/103c.wav    
  inflating: Dataset/cry/104c.wav    
  inflating: Dataset/cry/105c.wav    
  inflating: Dataset/cry/106c.wav    
  inflating: Dataset/cry/107c.wav    
  inflating: Dataset/cry/108c.wav    
  inflating: Dataset/cry/109c.wav    
  inflating: Dataset/cry/10c.wav     
  inflating: Dataset/cry/110c.wav    
  inflating: Dataset/cry/111c.wav    
  inflating: Dataset/cry/112c.wav    
  inflating: Dataset/cry/113c.wav    
  inflating: Dataset/cry/114c.wav    
  inflating: Dataset/cry/115c.wav    
  inflating: Dataset/cry/116c.wav    
  inflating: Dataset/cry/117c.wav    
  inflating: Dataset/cry

In [6]:
import os
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import tensorflow_hub as hub
import librosa
import numpy as np
from tensorflow.keras import layers, models

In [20]:
# Load YAMNet (frozen)
yamnet_model = hub.load("https://tfhub.dev/google/yamnet/1")

def extract_embedding(file_path):
    # Load audio at 16kHz
    audio, _ = librosa.load(file_path, sr=16000)
    # YAMNet returns scores, embeddings, spectrogram
    scores, embeddings, spectrogram = yamnet_model(audio)
    # Average across time dimension
    avg_embedding = tf.reduce_mean(embeddings, axis=0)
    return avg_embedding.numpy()


In [26]:
from sklearn.model_selection import train_test_split

X, y = [], []

for label_name, label_id in [("not_cry",0), ("cry",1)]:
    folder = f"dataset/{label_name}"
    for file in os.listdir(folder):
        # if file.endswith(".wav"):
            path = os.path.join(folder, file)
            emb = extract_embedding(path)
            X.append(emb)
            y.append(label_id)

X = np.stack(X)  # shape: (num_samples, 1024)
y = np.array(y)

# Split dataset
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)



In [27]:
model = models.Sequential([
    layers.Input(shape=(1024,)),   # YAMNet embedding size
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')  # 1 neuron for binary
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [28]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=16,
    epochs=20
)

Epoch 1/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.8011 - loss: 0.4001 - val_accuracy: 0.9551 - val_loss: 0.1668
Epoch 2/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9445 - loss: 0.1486 - val_accuracy: 0.9663 - val_loss: 0.1124
Epoch 3/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9812 - loss: 0.0756 - val_accuracy: 0.9607 - val_loss: 0.1352
Epoch 4/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9716 - loss: 0.1012 - val_accuracy: 0.9775 - val_loss: 0.1015
Epoch 5/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9810 - loss: 0.0967 - val_accuracy: 0.9494 - val_loss: 0.2320
Epoch 6/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9871 - loss: 0.1317 - val_accuracy: 0.9888 - val_loss: 0.0392
Epoch 7/20
[1m45/45[0m [32m━━━━━━━━━

In [30]:
model.save("baby_cry_classifier.keras")