# Upscale Audio (Super-Resolution) — Folder Batch

Upscales low-sample-rate audio (e.g. 16 kHz TTS output) to higher quality using neural super-resolution.

### Supported engines

| Engine | Size | Speed | Output | Notes |
|--------|------|-------|--------|-------|
| **NovaSR** | 52 KB | ~3500× RT (GPU) | 48 kHz | Best quality, needs CUDA |
| **FlashSR** | ~500 KB ONNX | ~200–400× RT | 48 kHz | CPU-friendly |
| **LavaSR** | Vocos-based | moderate | 16 kHz enhanced | Optional denoiser |

After upscaling, audio is resampled to `TARGET_SR` (default 22050 Hz).

---

> GPU recommended for NovaSR/LavaSR. FlashSR works well on CPU.

In [None]:
import os

# ════════════════════════════════════════════
# ⚙️  CONFIGURATION
# ════════════════════════════════════════════
os.environ.setdefault("INPUT_DIR",       "/data/audio_in")
os.environ.setdefault("OUTPUT_DIR",      "/data/audio_upscaled")
os.environ.setdefault("SR_ENGINE",       "flashsr")       # novasr | flashsr | lavasr
os.environ.setdefault("SR_DEVICE",       "cpu")            # cuda | cpu (novasr/lavasr)
os.environ.setdefault("TARGET_SR",       "22050")          # Final output sample rate
os.environ.setdefault("LAVASR_DENOISE",  "0")              # 1 = enable LavaSR denoiser

INPUT_DIR  = os.environ["INPUT_DIR"]
OUTPUT_DIR = os.environ["OUTPUT_DIR"]
SR_ENGINE  = os.environ["SR_ENGINE"].strip().lower()
SR_DEVICE  = os.environ["SR_DEVICE"]
TARGET_SR  = int(os.environ["TARGET_SR"])
DENOISE    = os.environ.get("LAVASR_DENOISE", "0").strip() in ("1", "true", "yes")

print(f"Input:     {INPUT_DIR}")
print(f"Output:    {OUTPUT_DIR}")
print(f"Engine:    {SR_ENGINE}")
print(f"Device:    {SR_DEVICE}")
print(f"Target SR: {TARGET_SR} Hz")

In [None]:
sr_engine = os.environ.get("SR_ENGINE", "flashsr").strip().lower()
if sr_engine == "novasr":
    !pip install --quiet --break-system-packages git+https://github.com/ysharma3501/NovaSR.git
elif sr_engine == "flashsr":
    !pip install --quiet --break-system-packages git+https://github.com/ysharma3501/FlashSR.git
elif sr_engine == "lavasr":
    !pip install --quiet --break-system-packages git+https://github.com/ysharma3501/LavaSR.git
!pip install --quiet --break-system-packages librosa soundfile numpy tqdm torch

In [None]:
import os
import numpy as np
import librosa
import soundfile as sf
import torch
from pathlib import Path
from tqdm import tqdm


def load_sr_model(engine, device):
    """Load super-resolution model. Returns (model, output_sr)."""
    if engine == "novasr":
        from NovaSR import FastSR
        return FastSR(device=device), 48000
    if engine == "flashsr":
        from FastAudioSR import FASR
        from huggingface_hub import hf_hub_download
        ckpt = hf_hub_download(repo_id="YatharthS/FlashSR", filename="upsampler.pth", local_dir=".")
        model = FASR(ckpt)
        _ = model.model.half()
        return model, 48000
    if engine == "lavasr":
        from LavaSR.model import LavaEnhance
        return LavaEnhance("YatharthS/LavaSR", device), 16000
    raise ValueError(f"Unknown SR engine: {engine}")


def apply_sr(wav_np, sr_in, engine, model, sr_out):
    """Apply super-resolution. Returns (upsampled_wav, new_sr)."""
    if sr_in != 16000:
        wav_np = librosa.resample(wav_np, orig_sr=sr_in, target_sr=16000)

    tensor = torch.from_numpy(wav_np).unsqueeze(0)

    if engine == "novasr":
        return model.infer(tensor).cpu().numpy().squeeze(), 48000
    if engine == "flashsr":
        return model.run(tensor.half()).cpu().numpy().squeeze(), 48000
    if engine == "lavasr":
        return model.enhance(tensor, denoise=DENOISE).cpu().numpy().squeeze(), 16000
    return wav_np, sr_in


# Load model once
print(f"Loading {SR_ENGINE} model...")
sr_model, sr_out_rate = load_sr_model(SR_ENGINE, SR_DEVICE)
print(f"  Model loaded. Native output: {sr_out_rate} Hz")

os.makedirs(OUTPUT_DIR, exist_ok=True)
files = sorted(Path(INPUT_DIR).rglob("*.wav"))
print(f"Found {len(files)} WAV files")

done, skipped, failed = 0, 0, 0
for src in tqdm(files, desc="Upscaling", unit="file"):
    rel = src.relative_to(INPUT_DIR)
    dst = Path(OUTPUT_DIR) / rel
    dst.parent.mkdir(parents=True, exist_ok=True)

    if dst.exists():
        skipped += 1
        continue

    try:
        y, sr = librosa.load(str(src), sr=None, mono=True)
        y_up, sr_up = apply_sr(y, sr, SR_ENGINE, sr_model, sr_out_rate)

        # Final resample to TARGET_SR
        if sr_up != TARGET_SR:
            y_up = librosa.resample(y_up, orig_sr=sr_up, target_sr=TARGET_SR)

        sf.write(str(dst), y_up, TARGET_SR)
        done += 1
    except Exception as e:
        tqdm.write(f"  FAIL {src.name}: {e}")
        failed += 1

print(f"\nDone: {done} upscaled, {skipped} skipped, {failed} failed")

# Cleanup
del sr_model
if SR_DEVICE == "cuda":
    try:
        torch.cuda.empty_cache()
    except Exception:
        pass