In [None]:
# YAMNet Infant Cry Classification — using Kaggle “Infant Cry Dataset”

import os
import numpy as np
import librosa
import tensorflow as tf
import tensorflow_hub as hub
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

SAMPLE_RATE = 16000

yamnet = hub.KerasLayer("https://tfhub.dev/google/yamnet/1", trainable=False)

def embed_wav(wav_path):
    y, _ = librosa.load(wav_path, sr=SAMPLE_RATE)
    waveform = tf.convert_to_tensor(y, dtype=tf.float32)
    scores, embeddings, spectrogram = yamnet(waveform)
    # embeddings: shape (frames, embedding_dim)
    emb_np = embeddings.numpy()
    # aggregate, e.g. mean
    return np.mean(emb_np, axis=0)

def load_embeds(raw_folder):
    X, y = [], []
    labels = sorted(os.listdir(raw_folder))
    label2idx = {lab: i for i, lab in enumerate(labels)}
    for lab in labels:
        lab_dir = os.path.join(raw_folder, lab)
        for fname in os.listdir(lab_dir):
            if not fname.lower().endswith(".wav"):
                continue
            fp = os.path.join(lab_dir, fname)
            emb = embed_wav(fp)
            X.append(emb)
            y.append(label2idx[lab])
    return np.array(X), np.array(y), labels

RAW_DIR = "data/raw"
X, y, class_names = load_embeds(RAW_DIR)
print("Embedding load:", X.shape, len(class_names), class_names)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X.shape[1],)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(len(class_names), activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32)

# plot
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend()
plt.show()

# evaluation
y_pred = model.predict(X_val)
y_pred = np.argmax(y_pred, axis=1)
print(classification_report(y_val, y_pred, target_names=class_names))
print(confusion_matrix(y_val, y_pred))
