# üß™ Spectral Affinity: Audio Restoration (The Boutique Lab)

This notebook implements a professional-grade restoration pipeline specifically tuned for AI-generated audio (e.g., Suno, Udio). It moves beyond basic restoration into "Boutique Mastering" territory.

## üéõÔ∏è The Signal Chain (Architecture 2.0)

1.  **Mono-Maker (<120Hz)**: Forces low frequencies to mono for solid, phase-coherent bass.
2.  **Mid/Side Matrix**: Splits audio into Center (Mid) and Stereo Width (Side).
    *   **Mid Logic**: Transient Shaping (Punch) -> Focuses on kick/snare.
    *   **Side Logic**: Harmonic Saturation (Air) -> Widens the image without muddying the center.
3.  **Spectral De-Harshing**: Dynamic resonance suppression (like "Soothe2") to tame AI artifacts/whistling.
4.  **Hybrid Phase Re-assembly**: Recombines everything with a soft-clipper and limiter.

In [None]:
!pip install -q pedalboard numpy scipy torchaudio tqdm

In [None]:
import numpy as np
import scipy.signal as signal
import torch
import torchaudio
from pedalboard import Pedalboard, Compressor, Distortion, Gain, HighpassFilter, LowpassFilter, HighShelfFilter, Limiter

def mono_maker(audio_side, sample_rate, cutoff_hz=120):
    """
    Forces low frequencies to be Mono by high-passing the Side channel.
    Algorithm: If Side usually contains (L-R), removing lows from Side makes L=R in lows.
    """
    sos = signal.butter(4, cutoff_hz, 'hp', fs=sample_rate, output='sos')
    # Minimum phase filtering is often preferred here for tight grouping
    processed_side = signal.sosfilt(sos, audio_side)
    return processed_side

def ms_encode(audio_lr):
    """Converts Stereo L/R to Mid/Side"""
    mid = (audio_lr[0] + audio_lr[1]) * 0.5
    side = (audio_lr[0] - audio_lr[1]) * 0.5
    return mid, side

def ms_decode(mid, side):
    """Converts Mid/Side back to Stereo L/R"""
    left = mid + side
    right = mid - side
    return np.stack([left, right])

def spectral_deharsh(audio_array, sample_rate, threshold_ratio=1.5, window_size=2048):
    """
    Dynamic Resonance/Harshness suppression (simplified 'Soothe' logic).
    Finds spectral peaks that stick out too much from the running average and tames them.
    """
    f, t, Zxx = signal.stft(audio_array, fs=sample_rate, nperseg=window_size)
    mag = np.abs(Zxx)
    
    # Calculate spectral envelope (smooth version of magnitude)
    # We blur across frequency axis to find the "average" shape
    kernel_size = 30
    envelope = signal.medfilt2d(mag, kernel_size=(kernel_size, 1))
    
    # Identify peaks that are much louder than the envelope (resonances)
    mask = mag > (envelope * threshold_ratio)
    
    # Create reduction curve (inverse of the excess)
    gain_map = np.ones_like(mag)
    # Soft reduction: The more it exceeds, the more we reduce, but clamped
    reduction = np.clip(envelope[mask] / (mag[mask] + 1e-6), 0.5, 1.0)
    gain_map[mask] = reduction
    
    # Apply gain map
    Zxx_clean = Zxx * gain_map
    _, audio_clean = signal.istft(Zxx_clean, fs=sample_rate)
    
    # Match lengths (ISTFT sometimes adds/removes tiny padding)
    if len(audio_clean) > len(audio_array):
        audio_clean = audio_clean[:len(audio_array)]
    elif len(audio_clean) < len(audio_array):
        audio_clean = np.pad(audio_clean, (0, len(audio_array) - len(audio_clean)))
        
    return audio_clean

def transient_shaper_mid(mid_signal, sample_rate, punch=1.3):
    """
    Vectorized Transient Shaper applied ONLY to Mid channel.
    Restores kick/snare punch.
    """
    abs_sig = np.abs(mid_signal)
    sos_fast = signal.butter(1, 40, 'low', fs=sample_rate, output='sos')
    sos_slow = signal.butter(1, 5, 'low', fs=sample_rate, output='sos')
    
    env_fast = signal.sosfiltfilt(sos_fast, abs_sig) # Zero phase for accurate timing
    env_slow = signal.sosfiltfilt(sos_slow, abs_sig)
    
    transient_ratio = env_fast / (env_slow + 1e-8)
    gain_curve = np.where(transient_ratio > 1.05, transient_ratio ** (punch - 1.0), 1.0)
    gain_curve = np.clip(gain_curve, 1.0, 2.0)
    
    return mid_signal * gain_curve

def saturate_side(side_signal, sample_rate, drive=2.0):
    """
    Applies Harmonic Saturation ONLY to Side channel.
    Adds width and air without dirtying the center.
    """
    # We use Pedalboard for quality tube-like distortion
    # Expand dims for Pedalboard [channels, samples]
    side_expanded = side_signal[None, :]
    
    board = Pedalboard([
        HighpassFilter(cutoff_frequency_hz=300), # Don't mud up the sides
        Distortion(drive_db=drive),
        Gain(gain_db=-1) # Level match
    ])
    
    processed = board(side_expanded, sample_rate)
    return processed.squeeze()

def boutique_mastering_chain(audio_lr, sample_rate):
    print("  ... 1. Encoding M/S Matrix")
    mid, side = ms_encode(audio_lr)
    
    print("  ... 2. Correcting Low-End Phase (Mono-Maker)")
    side = mono_maker(side, sample_rate, cutoff_hz=120)
    
    print("  ... 3. Processing Mid (Transient Punch)")
    mid = transient_shaper_mid(mid, sample_rate, punch=1.4)
    
    print("  ... 4. Processing Side (Harmonic Widening)")
    side = saturate_side(side, sample_rate, drive=4.0)
    
    print("  ... 5. Reconstructing Stereo")
    stereo_reconstructed = ms_decode(mid, side)
    
    print("  ... 6. Spectral De-Harshing (Soothe Logic)")
    # Process L and R independently for de-harshing to maintain stereo image details
    clean_l = spectral_deharsh(stereo_reconstructed[0], sample_rate)
    clean_r = spectral_deharsh(stereo_reconstructed[1], sample_rate)
    final_mix = np.stack([clean_l, clean_r])
    
    print("  ... 7. Final Limiting")
    limiter = Pedalboard([Limiter(threshold_db=-1.0)])
    master = limiter(final_mix, sample_rate)
    
    return master

print("‚úÖ Boutique Lab 2.0: Ready for Audio Surgery.")

### Usage Example

In [None]:
# Example Usage:
# audio, freq = torchaudio.load("input.wav")
# audio_np = audio.numpy()
# result = boutique_mastering_chain(audio_np, freq)
# torchaudio.save("mastered_boutique.wav", torch.from_numpy(result), freq)