In [None]:
!pip install -q noisereduce pyloudnorm soundfile librosa transformers torch

import os, json, hashlib, time, random, warnings, shutil, pickle
from pathlib import Path
from typing import Optional, List, Dict, Tuple
from collections import defaultdict

import numpy as np
import librosa
import soundfile as sf
from scipy.signal import fftconvolve
import noisereduce as nr
import torch
from transformers import ClapModel, ClapProcessor
import pyloudnorm as pyln

warnings.filterwarnings('ignore')
random.seed(42)
np.random.seed(42)

print("üü¢ [Setup] Libraries imported and deterministic seeds set.")

### Wipe Kaggle's Output Storage

In [None]:
import os
import shutil

working_dir = '/kaggle/working'

print("üü° Wiping Kaggle working directory...")

for item in os.listdir(working_dir):
    item_path = os.path.join(working_dir, item)
    try:
        if os.path.isfile(item_path) or os.path.islink(item_path):
            os.unlink(item_path)
        elif os.path.isdir(item_path):
            shutil.rmtree(item_path)
    except Exception as e:
        print(f"üî¥ Failed to delete {item_path}. Reason: {e}")

print("üü¢ Kaggle working directory is completely clean.")

### Wipe a Specific Folder/File

In [None]:
import shutil
from pathlib import Path

def delete_target(target_path):
    target = Path(target_path)
    
    if not target.exists():
        print(f"üü° Target does not exist: {target}")
        return
        
    try:
        if target.is_file() or target.is_symlink():
            target.unlink()
            print(f"üü¢ Successfully deleted file: {target}")
        elif target.is_dir():
            shutil.rmtree(target)
            print(f"üü¢ Successfully deleted directory: {target}")
    except Exception as e:
        print(f"üî¥ Failed to delete {target}. Reason: {e}")

# Just drop your path here
delete_target('/kaggle/working/your_folder_or_file_here')

In [None]:
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# PHASE 0 ‚Äî Environment & Path Configuration
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

# KAGGLE INPUT PATHS
PATHS = {
    'bad_irs':   Path('/kaggle/input/datasets/itorousa/impulse-responses'),
    'mit_irs':   Path('/kaggle/input/datasets/kynthesis/mit-reverb-dataset/MIT_Reverb_Dataset/MIT_Reverb_Dataset'),
    'ljspeech':  Path('/kaggle/input/datasets/dromosys/ljspeech/'),
    'vctk':      Path('/kaggle/input/datasets/kynthesis/vctk-corpus/VCTK-Corpus/wav48'),
#   'langid_en': Path('/kaggle/input/datasets/shrivatssudhir/language-identifier/english/clips')
}

# ‚ö†Ô∏è UPDATE THIS PATH WHEN CHAINING RUNS ‚ö†Ô∏è
PREV_RUN_PATH = Path('/kaggle/input/notebooks/itorousa/genesis-data-run1')

# OUTPUT PATHS
OUTPUT          = Path('/kaggle/working')
BATCH_DIR       = OUTPUT / 'batches'
CLAP_DIR        = OUTPUT / 'clap_model'
STERILIZED_DIR  = OUTPUT / 'sterilized_batches'
MIT_IR_DIR      = OUTPUT / 'irs' / 'mit_irs'

for d in [BATCH_DIR, CLAP_DIR, STERILIZED_DIR, MIT_IR_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# AUDIO PARAMETERS & BUDGET
SR            = 48_000
CLIP_SEC      = 5.0
CLIP_SAMPLES  = int(SR * CLIP_SEC)
TRIPLES_PER_BATCH  = 500
MAX_OUTPUT_GB      = 19.0 

print(f"üü¢ [Config] Output budget: {MAX_OUTPUT_GB} GB | SR: {SR} | Batch Size: {TRIPLES_PER_BATCH}")

In [None]:
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# PHASE 1 ‚Äî Aggressive Checkpoint Initialization
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
CHECKPOINT_PATH = OUTPUT / 'checkpoint.json'

def get_output_size_gb() -> float:
    '''Calculate the exact size of /kaggle/working in GB.'''
    total = sum(f.stat().st_size for f in OUTPUT.rglob('*') if f.is_file())
    return total / (1024 ** 3)

def load_checkpoint() -> dict:
    # 1) Check previous run first (chaining)
    prev_ckpt = PREV_RUN_PATH / 'checkpoint.json'
    if prev_ckpt.exists():
        with open(prev_ckpt) as f:
            ckpt = json.load(f)
        ckpt['run_number'] += 1
        print(f"üü¢ [Checkpoint] ‚ôª Resuming from previous run: {ckpt['triples_completed']} triples done")
        return ckpt

    # 2) Check current working dir (kernel restart mid-session)
    if CHECKPOINT_PATH.exists():
        with open(CHECKPOINT_PATH) as f:
            return json.load(f)

    # 3) Fresh start
    return {
        'batch_id': 0,
        'triples_completed': 0,
        'vocal_cursor': 0,
        'run_number': 1,
    }

def save_checkpoint(ckpt: dict):
    with open(CHECKPOINT_PATH, 'w') as f:
        json.dump(ckpt, f, indent=2)

ckpt = load_checkpoint()
save_checkpoint(ckpt) # Initialize immediately

print(f"üü¢ [Checkpoint] Run #{ckpt['run_number']} | Starting at vocal cursor {ckpt['vocal_cursor']}")

In [None]:
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# PHASE 2 ‚Äî Impulse Response Acquisition & Pooling
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
CATALOGUE_PATH = OUTPUT / 'ir_catalogue.json'
prev_catalogue = PREV_RUN_PATH / 'ir_catalogue.json'

bad_pool: List[str] = []
target_pool: List[str] = []
ir_catalogue: Dict = {}

# ‚îÄ‚îÄ‚îÄ Fast path: Copy from previous run if exists ‚îÄ‚îÄ‚îÄ
if prev_catalogue.exists() and not CATALOGUE_PATH.exists():
    shutil.copy2(prev_catalogue, CATALOGUE_PATH)
    print("üü¢ [IR Phase] Copied catalogue from previous run.")

if CATALOGUE_PATH.exists():
    print("üü¢ [IR Phase] Found existing catalogue. Bypassing extraction.")
    with open(CATALOGUE_PATH, 'r') as f:
        ir_catalogue = json.load(f)
        
    for ir_id, feats in ir_catalogue.items():
        if feats['source'] == 'mit':
            target_pool.append(ir_id)
        else:
            bad_pool.append(ir_id)
            
    print(f"üü¢ [IR Phase] Re-hydrated pools -> Target (MIT): {len(target_pool)} | Bad (Custom): {len(bad_pool)}")

else:
    print("üü¢ [IR Phase] Processing IRs from scratch...")
    
    # Copy MIT IRs to working directory for easier access later
    if PATHS['mit_irs'].exists():
        shutil.copytree(PATHS['mit_irs'], MIT_IR_DIR, dirs_exist_ok=True)
        
    def process_ir(filepath: Path, source_tag: str):
        try:
            audio, _ = librosa.load(str(filepath), sr=SR, mono=True)
            if len(audio) < 64: return
            peak = np.max(np.abs(audio))
            if peak > 1e-6: audio = audio / peak
            
            ir_id = f"{source_tag}_{filepath.stem}"
            ir_catalogue[ir_id] = {'source': source_tag, 'path': str(filepath)}
            
            if source_tag == 'mit': target_pool.append(ir_id)
            else: bad_pool.append(ir_id)
        except Exception:
            pass

    # Process MIT (Target)
    for f in MIT_IR_DIR.rglob('*.wav'):
        process_ir(f, 'mit')
        
    # Process Custom (Bad)
    for ext in ('*.irs', '*.wav'):
        for f in PATHS['bad_irs'].rglob(ext):
            process_ir(f, 'bad')

    with open(CATALOGUE_PATH, 'w') as f:
        json.dump(ir_catalogue, f, indent=2)
        
    print(f"üü¢ [IR Phase] Catalogue built -> Target: {len(target_pool)} | Bad: {len(bad_pool)}")

In [None]:
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# PHASE 3 ‚Äî CLAP Target Embedding Cache
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
CLAP_CACHE_PATH = OUTPUT / 'clap_cache.npz'
prev_cache = PREV_RUN_PATH / 'clap_cache.npz'
clap_cache_data = {}

# ‚îÄ‚îÄ‚îÄ Fast path: Load from cache ‚îÄ‚îÄ‚îÄ
if prev_cache.exists() and not CLAP_CACHE_PATH.exists():
    shutil.copy2(prev_cache, CLAP_CACHE_PATH)
    print("üü¢ [CLAP Phase] Copied embedding cache from previous run.")

if CLAP_CACHE_PATH.exists():
    print("üü¢ [CLAP Phase] Found existing clap_cache.npz. Bypassing model loading.")
    clap_cache_data = dict(np.load(CLAP_CACHE_PATH))
    print(f"üü¢ [CLAP Phase] Loaded {len(clap_cache_data)} embeddings.")
else:
    print("üü¢ [CLAP Phase] Loading model to compute embeddings...")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    CLAP_MODEL_ID = "laion/larger_clap_music_and_speech"
    
    # Handle frozen model transfer to avoid re-downloading
    prev_clap_model = PREV_RUN_PATH / 'clap_model'
    if prev_clap_model.exists() and not CLAP_DIR.exists():
        shutil.copytree(prev_clap_model, CLAP_DIR, dirs_exist_ok=True)
        
    if (CLAP_DIR / 'config.json').exists():
        clap_processor = ClapProcessor.from_pretrained(CLAP_DIR)
        clap_model = ClapModel.from_pretrained(CLAP_DIR).to(device).eval()
    else:
        clap_processor = ClapProcessor.from_pretrained(CLAP_MODEL_ID)
        clap_model = ClapModel.from_pretrained(CLAP_MODEL_ID).to(device).eval()
        clap_model.save_pretrained(CLAP_DIR)
        clap_processor.save_pretrained(CLAP_DIR)

    ref_noise = np.random.randn(SR * 3).astype(np.float32) * 0.1
    
    for i, ir_id in enumerate(target_pool):
        if ir_id not in ir_catalogue: continue
        
        ir_audio, _ = librosa.load(ir_catalogue[ir_id]['path'], sr=SR, mono=True)
        scene = fftconvolve(ref_noise, ir_audio, mode='full')[:SR * 3]
        scene = scene / (np.max(np.abs(scene)) + 1e-8)
        
        inputs = clap_processor(audio=scene, sampling_rate=SR, return_tensors="pt")
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = clap_model.get_audio_features(**inputs)
            # Critical: Must extract pooler_output
            emb = outputs.pooler_output.cpu().numpy().flatten().astype(np.float32)
            
        clap_cache_data[ir_id] = emb
        
    np.savez(CLAP_CACHE_PATH, **clap_cache_data)
    print(f"üü¢ [CLAP Phase] Computed and cached {len(clap_cache_data)} embeddings.")

In [None]:
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# PHASE 4 ‚Äî Vocal Sterilization & "Dead" Audio Guarantee
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
STATE_FILE = STERILIZED_DIR / 'sterilize_state.json'
prev_sterilized = PREV_RUN_PATH / 'sterilized_batches'

# ‚îÄ‚îÄ‚îÄ Fast path: Transfer previous chunks ‚îÄ‚îÄ‚îÄ
if prev_sterilized.exists() and (prev_sterilized / 'sterilize_state.json').exists():
    if not STATE_FILE.exists():
        shutil.copytree(prev_sterilized, STERILIZED_DIR, dirs_exist_ok=True)
        print("üü¢ [Sterilize Phase] Copied sterilized batches from previous run.")

_st = {}
if STATE_FILE.exists():
    with open(STATE_FILE) as f:
        _st = json.load(f)

if _st.get('completed', False):
    print("üü¢ [Sterilize Phase] Sterilization previously completed. Bypassing extraction.")
else:
    print("üü¢ [Sterilize Phase] Discovering source audio...")
    meter = pyln.Meter(SR)
    all_files = []
    
    for p in PATHS.values():
        if p.name in ['impulse-responses', 'MIT_Reverb_Dataset']: continue
        for ext in ('*.wav', '*.mp3', '*.ogg'):
            all_files.extend([(f, p.name) for f in p.rglob(ext)])
            
    all_files.sort(key=lambda x: str(x[0]))
    random.Random(42).shuffle(all_files)
    print(f"üü¢ [Sterilize Phase] Found {len(all_files)} raw vocal files.")
    
    cursor = _st.get('cursor', 0)
    print(f"üü¢ [Sterilize Phase] Resuming extraction at file index: {cursor}")
    
    vocal_segments = []
    STERILIZE_CHUNK = 500
    
    for i in range(cursor, len(all_files)):
        if get_output_size_gb() > MAX_OUTPUT_GB:
            print(f"‚ö†Ô∏è Output limit reached. Pausing sterilization.")
            break
            
        fpath, tag = all_files[i]
        try:
            audio, _ = librosa.load(str(fpath), sr=SR, mono=True)
            if len(audio) < SR * 1.5: continue
            
            audio = nr.reduce_noise(y=audio, sr=SR, stationary=True, prop_decrease=0.85)
            audio, _ = librosa.effects.trim(audio, top_db=40)
            if len(audio) < SR * 1.5: continue
            
            loudness = meter.integrated_loudness(audio)
            if loudness > -70: audio = pyln.normalize.loudness(audio, loudness, -23.0)
            
            for start in range(0, len(audio) - SR, CLIP_SAMPLES):
                chunk = audio[start : start + CLIP_SAMPLES]
                if len(chunk) < CLIP_SAMPLES:
                    chunk = np.pad(chunk, (0, CLIP_SAMPLES - len(chunk)))
                
                if np.sqrt(np.mean(chunk ** 2)) < 1e-4: continue
                vocal_segments.append({'audio': chunk.astype(np.float32), 'file': fpath.name, 'dataset': tag})
                
        except Exception: pass
        
        # Flush to disk to protect RAM
        if (i + 1) % STERILIZE_CHUNK == 0 or (i + 1) == len(all_files):
            batch_idx = (i + 1) // STERILIZE_CHUNK
            batch_path = STERILIZED_DIR / f"sterilized_batch_{batch_idx:04d}.pkl"
            with open(batch_path, 'wb') as f:
                pickle.dump(vocal_segments, f)
                
            completed = (i + 1) >= len(all_files)
            with open(STATE_FILE, 'w') as f:
                json.dump({'cursor': i + 1, 'completed': completed}, f)
                
            print(f"  üíæ Saved {len(vocal_segments)} segments to {batch_path.name}. RAM cleared.")
            vocal_segments.clear()

# Verification check
total_segs = sum(len(pickle.load(open(f, 'rb'))) for f in STERILIZED_DIR.glob('*.pkl'))
print(f"üü¢ [Sterilize Phase] Total sterilized 5.0s segments on disk: {total_segs}")

---
> ‚ö†Ô∏è **Phase Skip**: Phases 2‚Äì4 above can be skipped entirely if a previous run
> completed them successfully. The notebook detects existing `ir_catalogue.json`,
> `clap_cache.npz`, and `sterilize_state.json` to bypass redundant computation.
> Previous run files are located at `/kaggle/input/notebooks/itorousa/genesis-data-run#`.
---


## üîó Checkpoint Chaining (20 GB Limit)

If the output hit the size limit before processing all vocals:

1. **Save this notebook's output** as a Kaggle dataset (e.g. `genesis-data-run1`)
2. **Create a new notebook** (with the full seven-phase code) and attach:
   - All the same input datasets (IRs, LJSpeech, VCTK, Language Identifier)
   - The previous output as input (update `PREV_RUN_PATH` in Cell 3)
3. **Run all cells** ‚Äî the checkpoint system automatically skips completed work

Each run produces ~19 GB of training triples. Chain as many times as needed.

### Using the Data in Training

```python
# In the training notebook, load all batches from all runs:
import numpy as np
from pathlib import Path

run_dirs = [
    Path('/kaggle/input/genesis-data-run1/batches'),
    Path('/kaggle/input/genesis-data-run2/batches'),
    # ... add more runs
]

for run_dir in run_dirs:
    for batch_file in sorted(run_dir.glob('batch_*.npz')):
        data = np.load(batch_file)
        source_audio = data['source_audio']   # (N, 240000) int16
        target_audio = data['target_audio']   # (N, 240000) int16
        target_clap  = data['target_clap']    # (N, CLAP_DIM) float32
        # Convert int16 back to float32: audio = source_audio.astype(np.float32) / 32767
        # Compute STFT on-the-fly during training for memory efficiency
```
