In [None]:
import os
import torchaudio
import torch

input_folder = "clips/positive"
threshold_db = - 30 
min_duration_sec = 0.35

removed_files = []

for filename in os.listdir(input_folder):
    if not filename.endswith(".wav"):
        continue

    filepath = os.path.join(input_folder, filename)

    try:
        waveform, sr = torchaudio.load(filepath)
        waveform = waveform.mean(dim=0) 
        duration_sec = waveform.shape[0] / sr

        if duration_sec < min_duration_sec:
            os.remove(filepath)
            removed_files.append((filename, "Too Short"))
            continue

        # calculate volume (dBFS)
        rms = torch.sqrt(torch.mean(waveform ** 2))
        if rms > 0:
            dbfs = 20 * torch.log10(rms)
        else:
            dbfs = -100
        
        # delete the low-volumes
        if isinstance(dbfs, torch.Tensor):
            dbfs_value = dbfs.item()
        else:
            dbfs_value = dbfs

        if dbfs_value < threshold_db:
            os.remove(filepath)
            removed_files.append((filename, f"Too Quiet: {dbfs_value:.2f} dBFS"))

    except Exception as e:
        print(f"⚠️ Error processing {filepath}: {e}")
        continue

print(f"🎯 Finished cleaning {input_folder}")
print(f"✅ Removed {len(removed_files)} files")
for f, reason in removed_files:
    print(f"❌ {f}: {reason}")

🎯 Finished cleaning clips/positive
✅ Removed 29629 files
❌ sw2564A-ms98-a-0009.wav: Too Quiet: -35.41 dBFS
❌ sw2507A-ms98-a-0059.wav: Too Short
❌ sw2415B-ms98-a-0108.wav: Too Quiet: -36.01 dBFS
❌ sw2764B-ms98-a-0091.wav: Too Short
❌ sw3571A-ms98-a-0083.wav: Too Quiet: -32.48 dBFS
❌ sw3448A-ms98-a-0068.wav: Too Quiet: -31.88 dBFS
❌ sw2228B-ms98-a-0038.wav: Too Quiet: -39.96 dBFS
❌ sw3520B-ms98-a-0064.wav: Too Short
❌ sw2866A-ms98-a-0075.wav: Too Quiet: -39.26 dBFS
❌ sw2293B-ms98-a-0040.wav: Too Quiet: -30.65 dBFS
❌ sw2547B-ms98-a-0019.wav: Too Quiet: -37.21 dBFS
❌ sw2130A-ms98-a-0062.wav: Too Quiet: -39.21 dBFS
❌ sw2153A-ms98-a-0032.wav: Too Short
❌ sw3367B-ms98-a-0009.wav: Too Quiet: -38.58 dBFS
❌ sw2941A-ms98-a-0016.wav: Too Short
❌ sw3179A-ms98-a-0019.wav: Too Short
❌ sw2989A-ms98-a-0038.wav: Too Quiet: -34.19 dBFS
❌ sw3378B-ms98-a-0020.wav: Too Short
❌ sw2986A-ms98-a-0017.wav: Too Quiet: -36.94 dBFS
❌ sw3628A-ms98-a-0038.wav: Too Quiet: -30.28 dBFS
❌ sw2092A-ms98-a-0079.wav: Too Qui