In [2]:
import torch
import torchaudio, os, numpy as np
from pathlib import Path
import soundfile as sf

In [None]:
TARGET_SR = 16000
CLIP_SECONDS = 3.0

resampler = torchaudio.transforms.Resample(orig_freq=48000, new_freq=TARGET_SR)

def load_and_resample(path):
    wav, sr = sf.read(path, dtype="float32")
    if wav.ndim == 2:  # stereo → mono
        wav = wav.mean(axis=1)
    if sr != TARGET_SR:
        wav = torchaudio.functional.resample(
            torch.from_numpy(wav), sr, TARGET_SR
        ).numpy()
    return wav

def pad_or_trim(wav):
    length = int(CLIP_SECONDS * TARGET_SR)
    if len(wav) > length:
        wav = wav[:length]
    elif len(wav) < length:
        wav = np.pad(wav, (0, length - len(wav)))
    return wav

root = Path("extracted")
out = Path("prepped_raw")
out.mkdir(parents=True, exist_ok=True)

for cls_dir in root.iterdir():
    if not cls_dir.is_dir(): continue
    label = cls_dir.name
    (out/label).mkdir(exist_ok=True)
    for wav_path in cls_dir.glob("*.wav"):
        wav = load_and_resample(str(wav_path))
        wav = pad_or_trim(wav)
        np.save(out/label/(wav_path.stem + ".npy"), wav.astype("float32"))