# üåå Spectral Affinity: Harmonic Mix & Curation Engine 2.0
### *"The Ultimate AI Librarian for Camelot-based Flow + Mastering"*

Este cuaderno combina la **Curadur√≠a Inteligente** con un **Mastering Adaptativo** (Matchering 2.0) y **Restauraci√≥n Arm√≥nica** (Pedalboard).

**La Soluci√≥n:**
1. üß† **Neural Analysis**: Key (Camelot), BPM, Energ√≠a y Textura Sem√°ntica (MERT).
2. ‚ú® **Harmonic Restoration**: Suno corta el audio a los 16kHz. Usamos **Excitaci√≥n Arm√≥nica** para devolver el brillo natural sin artefactos de IA.
3. üéöÔ∏è **Matchering 2.0**: Copiamos el "alma" (EQ, RMS, Ancho Est√©reo) de una referencia profesional y se la aplicamos a tus temas.
4. üîÄ **Harmonic Flow**: Secuenciaci√≥n autom√°tica siguiendo la Rueda de Camelot.
5. üìÇ **Auto-Organization**: Generaci√≥n de carpetas con sets listos para pinchar.

---

In [None]:
# üõ†Ô∏è Setup
try:
    import matchering as mg
    from pedalboard import Pedalboard
except:
    !pip install -q transformers torch torchaudio nnAudio librosa pandas scikit-learn tqdm matchering pedalboard soundfile

import os, torch, torchaudio, librosa, shutil, json, time, warnings, re
import numpy as np; import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from tqdm.auto import tqdm
import torchaudio.transforms as T
from transformers import Wav2Vec2FeatureExtractor, AutoModel
from sklearn.preprocessing import normalize; from sklearn.cluster import KMeans
from IPython.display import display, FileLink, HTML
import matchering as mg
from pedalboard import Pedalboard, HighpassFilter, Distortion, Gain, HighShelfFilter
import pedalboard.io

warnings.filterwarnings("ignore"); os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
SAMPLE_RATE = 24000 # Para an√°lisis
print(f"üåå MASTERING & CURATION ENGINE READY ON: {DEVICE.upper()}")

## üß† 1. Motor de An√°lisis Neuronal

In [None]:
CAMELOT_MAP = {
    'C Major': '08B', 'C# Major': '03B', 'D Major': '10B', 'D# Major': '05B', 'E Major': '12B', 'F Major': '07B',
    'F# Major': '02B', 'G Major': '09B', 'G# Major': '04B', 'A Major': '11B', 'A# Major': '06B', 'B Major': '01B',
    'C Minor': '05A', 'C# Minor': '12A', 'D Minor': '07A', 'D# Minor': '02A', 'E Minor': '09A', 'F Minor': '04A',
    'F# Minor': '11A', 'G Minor': '06A', 'G# Minor': '01A', 'A Minor': '08A', 'A# Minor': '03A', 'B Minor': '10A'
}

class NeuralAnalyzer:
    def __init__(self, device='cuda'):
        from nnAudio.Spectrogram import CQT1992v2
        self.device = device
        self.cqt = CQT1992v2(sr=SAMPLE_RATE, n_bins=84, bins_per_octave=12, verbose=False).to(device)
        self.proc = Wav2Vec2FeatureExtractor.from_pretrained('m-a-p/MERT-v1-95M', trust_remote_code=True)
        self.mert = AutoModel.from_pretrained('m-a-p/MERT-v1-95M', trust_remote_code=True).to(device).eval()
        major = torch.tensor([6.35,2.23,3.48,2.33,4.38,4.09,2.52,5.19,2.39,3.66,2.29,2.88], device=device)
        minor = torch.tensor([6.33,2.68,3.52,5.38,2.60,3.53,2.54,4.75,3.98,2.69,3.34,3.17], device=device)
        self.profiles = torch.stack([torch.roll(major,i) for i in range(12)] + [torch.roll(minor,i) for i in range(12)]).t()

    def analyze_batch(self, paths):
        def load_one(p):
            try:
                w, s = torchaudio.load(p)
                if s != SAMPLE_RATE: w = T.Resample(s, SAMPLE_RATE)(w)
                w = w.mean(0)
                return w, os.path.basename(p), p
            except: return None

        with ThreadPoolExecutor(max_workers=8) as pl: results = list(pl.map(load_one, paths))
        valid = [r for r in results if r is not None]
        if not valid: return []

        m_len = max([r[0].shape[0] for r in valid])
        t = torch.zeros(len(valid), m_len, device=self.device)
        for i, r in enumerate(valid): t[i, :r[0].shape[0]] = r[0].to(self.device)
        
        with torch.no_grad():
            spec = self.cqt(t)
            energy = spec.pow(2).mean(dim=(1,2)).cpu().numpy()
            chroma = spec.view(len(valid), 7, 12, -1).sum(dim=(1,3))
            chroma = chroma / (chroma.norm(dim=1,keepdim=True)+1e-6)
            best = torch.argmax(torch.matmul(chroma, self.profiles), dim=1).cpu().numpy()
            
            embs = []
            for i in range(len(valid)):
                sl = int(SAMPLE_RATE*15); s = valid[i][0][:sl].cpu().numpy()
                iv = self.proc(s, sampling_rate=SAMPLE_RATE, return_tensors='pt').input_values.to(self.device)
                embs.append(self.mert(iv).last_hidden_state.mean(dim=1).squeeze().cpu().numpy().tolist())

        pc = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
        batch_meta = []
        for i, (w, fname, fpath) in enumerate(valid):
            k = pc[best[i]%12]; m = 'Major' if best[i]<12 else 'Minor'
            y_np = w.numpy().squeeze(); tempo, _ = librosa.beat.beat_track(y=y_np[:SAMPLE_RATE*45], sr=SAMPLE_RATE)
            batch_meta.append({
                'path': fpath, 'file': fname, 'camelot': CAMELOT_MAP.get(f"{k} {m}", "08A"),
                'bpm': int(tempo.item()) if hasattr(tempo, 'item') else int(tempo),
                'energy': float(energy[i]), 'duration': len(w)/SAMPLE_RATE, 'embedding': embs[i]
            })
        return batch_meta

## ‚ú® 2. Recuperaci√≥n Arm√≥nica y Mastering Adaptativo

In [None]:
def harmonic_exciter(input_path, output_path, drive=1.8, mix=0.12):
    """Genera arm√≥nicos superiores naturales (>16kHz) usando Pedalboard."""
    with pedalboard.io.AudioFile(input_path) as f:
        audio = f.read(f.frames)
        sr = f.samplerate
    
    # Creamos un board para los arm√≥nicos
    # Procesamos en serie para evitar problemas de fase fuera del plugin
    board = Pedalboard([
        HighpassFilter(cutoff_frequency_hz=10000), 
        Distortion(drive_db=drive),               
        HighpassFilter(cutoff_frequency_hz=12000), 
        Gain(gain_db=-4)
    ])
    
    harmonics = board(audio, sr)
    # Mezcla simple pero efectiva, harmonics ya viene filtrado
    processed = audio + (harmonics * mix)
    
    with pedalboard.io.AudioFile(output_path, 'w', sr, processed.shape[0]) as f:
        f.write(processed)

def apply_mastering(target, reference, output_path):
    """Interfaz corregida para Matchering 2.0 (Output 24-bit)."""
    try:
        mg.process(
            target=target, 
            reference=reference, 
            results=[mg.pcm24(output_path)]
        )
        return True
    except Exception as e:
        print(f"  ‚ö†Ô∏è Error en Matchering: {e}")
        return False

## üîÄ 3. L√≥gica de Flujo Arm√≥nico Crom√°tico

In [None]:
def get_next_harmonic(c):
    n, a = int(c[:2]), c[2]
    return [f'{str(n).zfill(2)}{a}', f'{str((n%12)+1).zfill(2)}{a}', f'{str(((n-2)%12)+1).zfill(2)}{a}', f"{str(n).zfill(2)}{'A' if a=='B' else 'B'}"]

def sequence_chromatic_set(tracks, target_dur_sec=3600):
    if not tracks: return []
    pool = list(tracks); cur = pool.pop(0); ordered = [cur]; dur = cur['duration']
    
    while pool and dur < target_dur_sec:
        ck = get_next_harmonic(cur['camelot'])
        
        def get_score(t):
            h = 1.0 if t['camelot'] in ck else (0.8 if t['camelot']==cur['camelot'] else 0.0)
            b = max(0, 1.0 - (abs(t['bpm'] - cur['bpm']) / 40.0))
            s = np.dot(cur['embedding'], t['embedding']) / (np.linalg.norm(cur['embedding']) * np.linalg.norm(t['embedding']) + 1e-9)
            return h * 0.5 + s * 0.3 + b * 0.2
        
        # Encontramos el mejor manualmente para evitar re-odernar toda la lista (O(n) vs O(n log n))
        best_idx = 0
        max_s = -1
        for i, t in enumerate(pool):
            sc = get_score(t)
            if sc > max_s:
                max_s = sc
                best_idx = i
        
        nxt = pool.pop(best_idx)
        ordered.append(nxt)
        dur += nxt['duration']
        cur = nxt
    return ordered, pool

def clean_name(n): 
    name = os.path.basename(n).rsplit('.', 1)[0]
    return re.sub(r'[\-\_\.]+?', ' ', re.sub(r'^[\w\-]+?-', '', name)).strip()

## üöÄ 4. Lanzamiento del Motor (Harmonic Mix + Mastering)

In [None]:
INPUT_DIR = '/kaggle/input/datasets/danieldobles/slavic-songs'
if not os.path.exists(INPUT_DIR): INPUT_DIR = 'Slavic Data_Set'

# Buscamos el archivo de referencia profesional
potential_refs = [os.path.join(INPUT_DIR, f) for f in os.listdir(INPUT_DIR) if 'REF' in f.upper()]
REF_FILE = potential_refs[0] if potential_refs else None

OUT_DIR = '/kaggle/working/MASTERED_CURATION_RESULTS'
TEMP_DIR = '/kaggle/working/tmp_processing'
SET_DURATION = 60 * 60 # 60 minutes
N_GROUPS = 3

os.makedirs(OUT_DIR, exist_ok=True)
os.makedirs(TEMP_DIR, exist_ok=True)
files = [os.path.join(INPUT_DIR, f) for f in os.listdir(INPUT_DIR) if f.endswith(('.mp3','.wav','.flac')) and 'REF' not in f.upper() ]

print(f"üß† Analizando {len(files)} temas...")
analyzer = NeuralAnalyzer(device=DEVICE)
library = []
for i in tqdm(range(0, len(files), 16)): library.extend(analyzer.analyze_batch(files[i:i+16]))

print("üîç Segmentando por Estilo Sem√°ntico...")
X = normalize(np.array([t['embedding'] for t in library]))
labels = KMeans(n_clusters=N_GROUPS, n_init=10).fit_predict(X)
clusters = {i: [library[j] for j,l in enumerate(labels) if l==i] for i in range(N_GROUPS)}

print(f"\nüî• Procesando grupos en {N_GROUPS} Sonic Clusters...")
if REF_FILE: print(f"üìè Usando Referencia: {os.path.basename(REF_FILE)}")

for ci, tracks in clusters.items():
    group_name = f'Group_{chr(65+ci)}'
    pool = sorted(tracks, key=lambda x: x['energy'], reverse=True)
    set_idx = 1
    
    while pool:
        set_dir = os.path.join(OUT_DIR, group_name, f'Set_{set_idx}')
        os.makedirs(set_dir, exist_ok=True)
        oset, pool = sequence_chromatic_set(pool, SET_DURATION)
        
        print(f"\nüìÇ {group_name} | Set {set_idx} ({len(oset)} tracks)")
        for j, t in tqdm(enumerate(oset), total=len(oset), desc="Processing Audio", leave=False):
            base_name = clean_name(t['path'])
            out_name = f"{str(j+1).zfill(2)} - [{t['camelot']}] {base_name}.wav"
            final_path = os.path.join(set_dir, out_name)
            
            # 1. Recuperaci√≥n Arm√≥nica (Pedalboard)
            tmp_h = os.path.join(TEMP_DIR, f"proc_{ci}_{j}.wav")
            harmonic_exciter(t['path'], tmp_h)
            
            # 2. Matchering 2.0 (EQ, RMS, Stereo Matching)
            if REF_FILE:
                apply_mastering(tmp_h, REF_FILE, final_path)
            else:
                shutil.move(tmp_h, final_path)
            
            if os.path.exists(tmp_h): os.remove(tmp_h)
            
        set_idx += 1

# Cleanup & Zip
shutil.rmtree(TEMP_DIR)
shutil.make_archive('SPECTRAL_AFFINITY_MASTERED', 'zip', OUT_DIR)
display(HTML(f"<h3>üöÄ <a href='SPECTRAL_AFFINITY_MASTERED.zip'>DESCARGAR LIBRER√çA MASTERIZADA</a></h3>"))