In [5]:
import soundfile as sf
import numpy as np
import os
from pathlib import Path

def normalize_audio(audio_path, target_rms=0.1):
    audio, sr = sf.read(audio_path)
    rms = np.sqrt(np.mean(audio**2))
    scaling_factor = target_rms / (rms + 1e-6)
    audio_normalized = audio * scaling_factor
    return audio_normalized, sr

def process_directory(src_dir="data", dst_dir="data_new"):
    # Create destination directory with same structure
    src_path = Path(src_dir)
    dst_path = Path(dst_dir)
    
    # Walk through all files and directories
    for root, dirs, files in os.walk(src_path):
        # Create corresponding directory in dst_dir
        rel_path = Path(root).relative_to(src_path)
        dst_root = dst_path / rel_path
        dst_root.mkdir(parents=True, exist_ok=True)
        
        # Process audio files
        for file in files:
            if file.endswith(('.wav', '.mp3', '.flac')):
                src_file = Path(root) / file
                dst_file = dst_root / file
                
                # Normalize and save audio
                try:
                    audio_normalized, sr = normalize_audio(str(src_file))
                    sf.write(str(dst_file), audio_normalized, sr)
                except Exception as e:
                    print(f"Error processing {src_file}: {e}")

# Run the processing
process_directory()
