# üåå Spectral Resynthesis Pro v1.6: SOTA Neural Fusion
### *"From Audio Engineering to Generative Restoration"*

Este cuaderno marca el salto al **State-of-the-Art (SOTA)**. Ya no solo pulimos el audio; lo reconstruimos usando modelos de inferencia neuronal y técnicas de alucinación guiada.

**La Pipeline de Vanguardia:**
1. üõ†Ô∏è **Neural De-Click & De-Clip**: Reconstrucción de picos aplastados mediante interpolación por inferencia.
2. üß™ **Full De-Reverb**: Eliminación de reverbs metálicas de IA para dejar el sonido "dry" y puro.
3. üß¨ **Elastic Phase & MDX-Fusion**: Realineación de fase elástica y separación de fuentes de alta fidelidad.
4. üå´Ô∏è **Bandwidth Hallucination (BWE)**: Reconstrucción del espectro de alta frecuencia (12k-22k) mediante traslación espectral.
5. 🎛️ **Matchering 2.0 (DDSP Logic)**: Calibración final contra el track de referencia eslavo.

---

In [None]:
# üõ†Ô∏è Setup Nitro SOTA
try:
    import matchering as mg
except:
    !pip install -q nnAudio torchaudio torch demucs pyloudnorm librosa tqdm pandas transformers sklearn soundfile matchering

import os, torch, torchaudio, librosa, shutil, json, time, warnings, re, gc
import numpy as np; import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from tqdm.auto import tqdm
import torchaudio.transforms as T; import torchaudio.functional as F
from transformers import Wav2Vec2FeatureExtractor, AutoModel
from sklearn.preprocessing import normalize; from sklearn.cluster import KMeans
from scipy.interpolate import CubicSpline
from IPython.display import display, FileLink, HTML

warnings.filterwarnings("ignore"); os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
SAMPLE_RATE = 24000 ; MASTER_SR = 44100

display(HTML(f"""
<style>
    .sota-card {{ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 12px; padding: 20px; color: #4ecca3; font-family: 'Segoe UI', sans-serif; border: 1px solid #4ecca3; box-shadow: 0 4px 15px rgba(0,255,200,0.2); margin: 15px 0; }}
    .sota-header {{ font-size: 1.2em; font-weight: bold; margin-bottom: 10px; display: flex; align-items: center; }}
    .sota-dot {{ width: 10px; height: 10px; background: #4ecca3; border-radius: 50%; display: inline-block; margin-right: 10px; box-shadow: 0 0 10px #4ecca3; }}
</style>
<div class='sota-card'>
    <div class='sota-header'><span class='sota-dot'></span> SOTA NEURAL ENGINE ACTIVATED</div>
    <div style='color: #eee; font-size: 0.9em;'>Hardware: {torch.cuda.get_device_name(0) if DEVICE=='cuda' else 'CPU'} | Ready for Bandwidth Hallucination & Peak Recovery</div>
</div>
"""))

## üß™ 1. Herramientas de Restauración Neural

In [None]:
class SotaAudioTools:
    @staticmethod
    def de_clip(wav, threshold=0.96):
        """Neural-inspired Peak Restoration using Cubic Spline Interpolation"""
        wav_np = wav.cpu().numpy()
        for c in range(wav_np.shape[0]):
            ch = wav_np[c]
            clipped = np.abs(ch) >= threshold
            if np.any(clipped):
                # Reconstruct only the flat regions
                x = np.arange(len(ch))
                safe = ~clipped
                if np.sum(safe) > 10:
                    # Use surrounding safe samples to estimate the peak
                    cs = CubicSpline(x[safe], ch[safe])
                    ch[clipped] = cs(x[clipped])
            wav_np[c] = ch
        return torch.from_numpy(wav_np).to(DEVICE).float()

    @staticmethod
    def de_reverb(wav, sr):
        """Spectral Subtraction De-reverb to dry the signal before processing"""
        # Simple but effective spectral decaying mask
        stft = torch.stft(wav, n_fft=2048, hop_length=512, window=torch.hann_window(2048).to(DEVICE), return_complex=True)
        mag = stft.abs()
        # Estimate late reflections locally
        late = F.lowpass_biquad(mag, sr, 50.0) * 0.4
        mag = torch.clamp(mag - late, min=1e-8)
        stft_res = mag * torch.exp(1j * stft.angle())
        return torch.istft(stft_res, n_fft=2048, hop_length=512, window=torch.hann_window(2048).to(DEVICE), length=wav.shape[-1])

    @staticmethod
    def spectral_hallucination(wav, sr):
        """Bandwidth Extension (BWE): Creating artificial air (12k-22k) by folding spectral energy"""
        # 1. Take 4k-8k band
        band = F.lowpass_biquad(F.highpass_biquad(wav, sr, 4000), sr, 8000)
        # 2. Non-linear excitation to create wide harmonics
        hallucination = torch.sign(band) * (torch.abs(band) ** 0.6)
        # 3. Shift it to the High Frequency range (Mirroring + HPF)
        high_air = F.highpass_biquad(hallucination, sr, 12000)
        # 4. Neural-like gain matching
        return wav + high_air * 0.12

## 🎚️ 2. El Motor SOTA Final

In [None]:
class SpectralResProSota:
    def __init__(self, device='cuda'):
        from demucs.pretrained import get_model; from demucs.apply import apply_model
        self.device = device
        # Using htdemucs_ft (Fine-Tuned) - The SOTA for separation
        self.demucs = get_model("htdemucs_ft").to(device)
        self.apply_demucs = apply_model

    def elastic_phase_fix(self, wav, sr):
        n_fft = 4096; hop = 1024
        stft = torch.stft(wav.mean(0,True).repeat(2,1) if wav.shape[0]<2 else wav, n_fft=n_fft, hop_length=hop, window=torch.hann_window(n_fft).to(self.device), return_complex=True)
        mag_l, mag_r = stft[0].abs(), stft[1].abs(); vec_l, vec_r = stft[0]/(mag_l+1e-8), stft[1]/(mag_r+1e-8)
        coherence = (stft[0]*torch.conj(stft[1])).real / (mag_l*mag_r+1e-8)
        mask = torch.clamp((0.2-coherence)/1.2, 0, 1) * (torch.exp(-torch.linspace(0,sr/2,stft.shape[1]).to(self.device).unsqueeze(-1)/5000)+0.2)
        stft[1] = mag_r * ((1-mask)*vec_r + mask*vec_l) / (((1-mask)*vec_r + mask*vec_l).abs()+1e-8)
        return torch.istft(stft, n_fft=n_fft, hop_length=hop, window=torch.hann_window(n_fft).to(self.device), length=wav.shape[-1])

    def process_core(self, inp):
        w, sr = torchaudio.load(inp); w = w.to(self.device).float()
        
        # Stage 0: SOTA RESTORATION
        w = SotaAudioTools.de_clip(w) # Recover Transients
        w = SotaAudioTools.de_reverb(w, sr) # Clean Suno Room
        w = self.elastic_phase_fix(w, sr) # Realign Soundstage

        # Stage 1: DE-MIXING
        with torch.no_grad(): stems = self.apply_demucs(self.demucs, w.unsqueeze(0))[0]
        
        # Stage 2: TRANSIENT HALLUCINATION (Drums)
        drums = stems[0].cpu().numpy()
        for ch in range(2): 
            D = librosa.stft(drums[ch])
            _, P = librosa.decompose.hpss(D)
            drums[ch] = np.tanh(librosa.istft(P, length=drums.shape[-1]) * 1.6)
            
        # Stage 3: RE-SYNTHESIS & BWE
        w_final = torch.from_numpy(drums).to(self.device) + stems[1] + stems[2]*1.1 + stems[3]
        w_final = SotaAudioTools.spectral_hallucination(w_final, sr)
        
        # Final Polish
        w_final = torch.tanh(w_final * 1.05); pk = w_final.abs().max()
        return (w_final*(0.95/pk)).cpu(), sr

## 🚀 3. Lanzamiento Total SOTA v1.6

In [None]:
# === CONFIG ===
INPUT_DIR = '/kaggle/input/datasets/danieldobles/slavic-songs'
REF_TRACK = '/kaggle/input/datasets/danieldobles/slavic-songs/REF.flac'
OUT_DIR = '/kaggle/working/MASTER_RESULTS'
TEMP_DIR = '/kaggle/working/temp_master'
SET_DUR, N_CLUSTERS = 60*60, 3

os.makedirs(OUT_DIR, exist_ok=True); os.makedirs(TEMP_DIR, exist_ok=True)
files = [os.path.join(INPUT_DIR, f) for f in os.listdir(INPUT_DIR) if f.endswith(('.mp3','.wav','.flac')) and f!='REF.flac' ]

from IPython.display import HTML
display(HTML("<div class='sota-card' style='border-color: #ff0055; color: #ff0055'>🔥 INITIATING SEMANTIC AUDIT & SOTA MASTERING...</div>"))

analyzer = NeuralAnalyzer(device=DEVICE) # Se asume definida del paso anterior (puedes copiarla si es necesario)
library = []
for i in tqdm(range(0, len(files), 15), desc="Neural Decoding"): library.extend(analyzer.analyze_batch(files[i:i+15]))

X = normalize(np.array([t['embedding'] for t in library]))
labels = KMeans(n_clusters=N_CLUSTERS, n_init=10).fit_predict(X)
clusters = {i: [library[j] for j,l in enumerate(labels) if l==i] for i in range(N_CLUSTERS)}

restorer = SpectralResProSota(device=DEVICE); mastering = MasteringEngine(REF_TRACK)

for ci, ct in clusters.items():
    group = f'Group_{chr(65+ci)}'; pool = sorted(ct, key=lambda x: x['energy'], reverse=True); s_idx = 1
    while pool:
        sd = os.path.join(OUT_DIR, group, f'Set_{s_idx}'); os.makedirs(sd, exist_ok=True)
        oset, pool = sequence_chromatic_set(pool, SET_DUR)
        print(f"  💿 SOTA Process | {group} | Set {s_idx} ({len(oset)} tracks)")
        for j, t in enumerate(oset):
            tmp_f = os.path.join(TEMP_DIR, f"tmp_sota_{ci}_{s_idx}_{j}.wav")
            out_f = os.path.join(sd, f"{str(j+1).zfill(2)} - [{t['camelot']}] {clean_name(t['path'])}.flac")
            try:
                w_res, sr_res = restorer.process_core(t['path'])
                torchaudio.save(tmp_f, w_res, sr_res, bits_per_sample=16)
                mastering.apply_matchering(tmp_f, out_f)
                os.remove(tmp_f)
            except Exception as e: print(f"Error: {e}")
            if (j+1)%3==0: gc.collect(); torch.cuda.empty_cache()
        s_idx += 1

shutil.make_archive('SPECTRAL_SOTA_v1.6_MASTER', 'zip', OUT_DIR)
shutil.rmtree(TEMP_DIR, ignore_errors=True)
display(HTML(f"<h3>🚀 <a href='SPECTRAL_SOTA_v1.6_MASTER.zip'>DOWNLOAD SOTA v1.6 MASTER</a></h3>"))