# Normalize Volume — Folder Batch

Normalizes all WAV files in a folder to a **target loudness** (dBFS).
Ensures consistent volume across a dataset.

**Cloud-friendly:** processes one file at a time. Resume-safe.

---

> No GPU required. Uses `pydub`.

In [None]:
import os

# ════════════════════════════════════════════
# ⚙️  CONFIGURATION
# ════════════════════════════════════════════
os.environ.setdefault("INPUT_DIR",    "/data/audio_in")
os.environ.setdefault("OUTPUT_DIR",   "/data/audio_normalized")
os.environ.setdefault("TARGET_DBFS",  "-20.0")    # Target loudness in dBFS

INPUT_DIR   = os.environ["INPUT_DIR"]
OUTPUT_DIR  = os.environ["OUTPUT_DIR"]
TARGET_DBFS = float(os.environ["TARGET_DBFS"])

print(f"Input:       {INPUT_DIR}")
print(f"Output:      {OUTPUT_DIR}")
print(f"Target dBFS: {TARGET_DBFS}")

In [None]:
!pip install --quiet --break-system-packages pydub tqdm

In [None]:
import os
from pathlib import Path
from pydub import AudioSegment
from tqdm import tqdm

os.makedirs(OUTPUT_DIR, exist_ok=True)

files = sorted(Path(INPUT_DIR).rglob("*.wav"))
print(f"Found {len(files)} WAV files")

done, skipped, failed = 0, 0, 0
for src in tqdm(files, desc="Normalizing", unit="file"):
    rel = src.relative_to(INPUT_DIR)
    dst = Path(OUTPUT_DIR) / rel
    dst.parent.mkdir(parents=True, exist_ok=True)

    if dst.exists():
        skipped += 1
        continue

    try:
        audio = AudioSegment.from_wav(str(src))
        gain = TARGET_DBFS - audio.dBFS
        normalized = audio.apply_gain(gain)
        normalized.export(str(dst), format="wav")
        done += 1
    except Exception as e:
        tqdm.write(f"  FAIL {src.name}: {e}")
        failed += 1

print(f"\nDone: {done} normalized, {skipped} skipped, {failed} failed")