In [None]:
# CNN Baseline for Infant Cry Classification — Kaggle dataset

import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

SAMPLE_RATE = 16000
N_MELS = 64
HOP_LENGTH = 512
IMG_SIZE = (128,128)
BATCH_SIZE = 32
EPOCHS = 20

def wav_to_mel_image(wav_path):
    y, sr = librosa.load(wav_path, sr=SAMPLE_RATE)
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=N_MELS, hop_length=HOP_LENGTH)
    S_db = librosa.power_to_db(S, ref=np.max)
    S_norm = (S_db - S_db.min()) / (S_db.max() - S_db.min())
    import cv2
    img = cv2.resize(S_norm, IMG_SIZE)
    img3 = np.stack([img, img, img], axis=-1)
    return img3.astype(np.float32)

def load_dataset(raw_folder):
    X, y = [], []
    labels = sorted(os.listdir(raw_folder))
    label2idx = {lab: i for i, lab in enumerate(labels)}
    for lab in labels:
        lab_dir = os.path.join(raw_folder, lab)
        for fname in os.listdir(lab_dir):
            if not fname.lower().endswith('.wav'):
                continue
            fp = os.path.join(lab_dir, fname)
            X.append(wav_to_mel_image(fp))
            y.append(label2idx[lab])
    return np.array(X), np.array(y), labels

RAW_DIR = "data/raw"
X, y, class_names = load_dataset(RAW_DIR)
print("Loaded:", X.shape, len(class_names), class_names)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# build CNN
model = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(128, 3, activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(len(class_names), activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=EPOCHS,
                    batch_size=BATCH_SIZE)

plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend()
plt.show()

# evaluation
y_pred = np.argmax(model.predict(X_val), axis=1)
print(classification_report(y_val, y_pred, target_names=class_names))
print(confusion_matrix(y_val, y_pred))
