In [None]:
# ResNet-50 Infant Cry Classification — using Kaggle “Infant Cry Dataset”

import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
from pathlib import Path

# parameters
SAMPLE_RATE = 16000
N_MELS = 128
HOP_LENGTH = 512
IMG_SIZE = (224,224)
BATCH_SIZE = 32
EPOCHS = 20

# 1. preprocess: convert wav → mel spectrogram image arrays
def wav_to_mel_image(wav_path):
    y, sr = librosa.load(wav_path, sr=SAMPLE_RATE)
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=N_MELS, hop_length=HOP_LENGTH)
    S_db = librosa.power_to_db(S, ref=np.max)
    # normalize to 0-1
    S_norm = (S_db - S_db.min()) / (S_db.max() - S_db.min())
    # resize to IMG_SIZE
    import cv2
    img = cv2.resize(S_norm, IMG_SIZE)
    # convert to 3 channels by stacking
    img3 = np.stack([img, img, img], axis=-1)
    return img3.astype(np.float32)

# 2. load dataset
def load_dataset(raw_folder):
    X, y = [], []
    labels = sorted(os.listdir(raw_folder))
    label2idx = {lab: i for i, lab in enumerate(labels)}
    for lab in labels:
        lab_dir = os.path.join(raw_folder, lab)
        for fname in os.listdir(lab_dir):
            if not fname.lower().endswith(".wav"):
                continue
            fp = os.path.join(lab_dir, fname)
            img = wav_to_mel_image(fp)
            X.append(img)
            y.append(label2idx[lab])
    X = np.array(X)
    y = np.array(y)
    return X, y, labels

# change this path to your unzipped Kaggle dataset folder
RAW_DIR = "data/raw"  
X, y, class_names = load_dataset(RAW_DIR)
print("Loaded:", X.shape, len(class_names), class_names)

# 3. split
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 4. build model
base = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base.trainable = False
x = tf.keras.layers.GlobalAveragePooling2D()(base.output)
x = tf.keras.layers.Dropout(0.3)(x)
out = tf.keras.layers.Dense(len(class_names), activation='softmax')(x)
model = tf.keras.Model(base.input, out)

model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# 5. train
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=EPOCHS,
                    batch_size=BATCH_SIZE)

# 6. plot
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend()
plt.show()

# 7. evaluate
val_pred = np.argmax(model.predict(X_val), axis=1)
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_val, val_pred, target_names=class_names))
print(confusion_matrix(y_val, val_pred))
