# üåå Spectral Affinity: GPU-Accelerated Mastering & Curation 2.2
### *"The Ultimate AI Librarian for Camelot-based Flow + High-Speed GPU Analysis"*

Este cuaderno utiliza la **GPU de Kaggle** para acelerar dr√°sticamente el an√°lisis de audio y procesa el **Mastering (Matchering 2.0)** en paralelo utilizando m√∫ltiples n√∫cleos de CPU.

**La Soluci√≥n Acelerada:**
1. üß† **GPU Neural Analysis**: Key, BPM y Energ√≠a calculados en r√°fagas de 32 temas usando **Wav2Vec2 (MERT)** y **CQT** en CUDA.
2. üéöÔ∏è **Parallel Mastering**: Ejecuci√≥n de Matchering 2.0 en paralelo (multi-threading) para triplicar la velocidad de exportaci√≥n.
3. üîÄ **BPM-Harmonic Flow**: Secuenciaci√≥n autom√°tica de menos a m√°s BPM siguiendo la Rueda de Camelot.
4. üìÇ **Auto-Organization**: Organizaci√≥n instant√°nea en carpetas por estilos sem√°nticos.

---

In [None]:
# üõ†Ô∏è High-Speed Setup
try:
    import matchering as mg
except:
    !pip install -q transformers torch torchaudio nnAudio librosa pandas scikit-learn tqdm matchering soundfile

import os, torch, torchaudio, librosa, shutil, json, time, warnings, re
import numpy as np; import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.auto import tqdm
import torchaudio.transforms as T
from transformers import Wav2Vec2FeatureExtractor, AutoModel
from sklearn.preprocessing import normalize; from sklearn.cluster import KMeans
from IPython.display import display, FileLink, HTML
import matchering as mg

warnings.filterwarnings("ignore"); os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
SAMPLE_RATE = 24000
MAX_WORKERS = os.cpu_count() or 4 # Para el mastering paralelo
print(f"üåå GPU ACCELERATION: {DEVICE.upper()} ACTIVE")
print(f"‚öôÔ∏è CPU CORES FOR MASTERING: {MAX_WORKERS}")

## üß† 1. Motor de An√°lisis Neuronal (GPU Power)

In [None]:
CAMELOT_MAP = {
    'C Major': '08B', 'C# Major': '03B', 'D Major': '10B', 'D# Major': '05B', 'E Major': '12B', 'F Major': '07B',
    'F# Major': '02B', 'G Major': '09B', 'G# Major': '04B', 'A Major': '11B', 'A# Major': '06B', 'B Major': '01B',
    'C Minor': '05A', 'C# Minor': '12A', 'D Minor': '07A', 'D# Minor': '02A', 'E Minor': '09A', 'F Minor': '04A',
    'F# Minor': '11A', 'G Minor': '06A', 'G# Minor': '01A', 'A Minor': '08A', 'A# Minor': '03A', 'B Minor': '10A'
}

class NeuralAnalyzer:
    def __init__(self, device='cuda'):
        from nnAudio.Spectrogram import CQT1992v2
        self.device = device
        self.cqt = CQT1992v2(sr=SAMPLE_RATE, n_bins=84, bins_per_octave=12, verbose=False).to(device)
        self.proc = Wav2Vec2FeatureExtractor.from_pretrained('m-a-p/MERT-v1-95M', trust_remote_code=True)
        self.mert = AutoModel.from_pretrained('m-a-p/MERT-v1-95M', trust_remote_code=True).to(device).eval()
        major = torch.tensor([6.35,2.23,3.48,2.33,4.38,4.09,2.52,5.19,2.39,3.66,2.29,2.88], device=device)
        minor = torch.tensor([6.33,2.68,3.52,5.38,2.60,3.53,2.54,4.75,3.98,2.69,3.34,3.17], device=device)
        self.profiles = torch.stack([torch.roll(major,i) for i in range(12)] + [torch.roll(minor,i) for i in range(12)]).t()
        self.resampler = T.Resample(orig_freq=44100, new_freq=SAMPLE_RATE).to(device) # Resampler base GPU

    def analyze_batch(self, paths):
        def load_one(p):
            try:
                w, s = torchaudio.load(p)
                # Pre-resample r√°pido si es posible, o normalizamos en batch
                return w.mean(0), s, os.path.basename(p), p
            except: return None

        with ThreadPoolExecutor(max_workers=8) as pl: results = list(pl.map(load_one, paths))
        valid = [r for r in results if r is not None]
        if not valid: return []

        # Procesamiento de audio en GPU para an√°lisis
        batch_waves = []
        for w, s, fname, fpath in valid:
            w_gpu = w.to(self.device)
            if s != SAMPLE_RATE:
                # Resampling en GPU
                r = T.Resample(s, SAMPLE_RATE).to(self.device)
                w_gpu = r(w_gpu)
            batch_waves.append(w_gpu)

        m_len = max([w.shape[0] for w in batch_waves])
        t = torch.zeros(len(batch_waves), m_len, device=self.device)
        for i, w in enumerate(batch_waves): t[i, :w.shape[0]] = w
        
        with torch.no_grad():
            spec = self.cqt(t)
            energy = spec.pow(2).mean(dim=(1,2)).cpu().numpy()
            chroma = spec.view(len(batch_waves), 7, 12, -1).sum(dim=(1,3))
            chroma = chroma / (chroma.norm(dim=1,keepdim=True)+1e-6)
            best = torch.argmax(torch.matmul(chroma, self.profiles), dim=1).cpu().numpy()
            
            embs = []
            for i in range(len(batch_waves)):
                sl = int(SAMPLE_RATE*15); s = batch_waves[i][:sl].cpu().numpy()
                iv = self.proc(s, sampling_rate=SAMPLE_RATE, return_tensors='pt').input_values.to(self.device)
                embs.append(self.mert(iv).last_hidden_state.mean(dim=1).squeeze().cpu().numpy().tolist())

        pc = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
        batch_meta = []
        for i, (w_orig, s_orig, fname, fpath) in enumerate(valid):
            k = pc[best[i]%12]; m = 'Major' if best[i]<12 else 'Minor'
            # BPM en CPU (librosa es muy preciso para esto)
            y_np = batch_waves[i][:SAMPLE_RATE*45].cpu().numpy()
            tp, _ = librosa.beat.beat_track(y=y_np, sr=SAMPLE_RATE)
            batch_meta.append({
                'path': fpath, 'file': fname, 'camelot': CAMELOT_MAP.get(f"{k} {m}", "08A"),
                'bpm': int(round(tp.item() if hasattr(tp, 'item') else tp)),
                'energy': float(energy[i]), 'duration': len(batch_waves[i])/SAMPLE_RATE, 'embedding': embs[i]
            })
        return batch_meta

## üéöÔ∏è 2. Parallel Mastering Engine

In [None]:
def apply_mastering_worker(target, reference, output_path):
    """Worker individual para Matchering."""
    try:
        mg.process(target=target, reference=reference, results=[mg.pcm24(output_path)])
        return True
    exceptException as e:
        print(f"  ‚ö†Ô∏è Error: {e}")
        return False

## üîÄ 3. L√≥gica de Flujo Arm√≥nico

In [None]:
def get_next_harmonic(c):
    n, a = int(c[:2]), c[2]
    return [f'{str(n).zfill(2)}{a}', f'{str((n%12)+1).zfill(2)}{a}', f'{str(((n-2)%12)+1).zfill(2)}{a}', f"{str(n).zfill(2)}{'A' if a=='B' else 'B'}"]

def sequence_chromatic_set(tracks, target_dur_sec=3600):
    if not tracks: return []
    pool = sorted(tracks, key=lambda x: x['bpm'])
    cur = pool.pop(0); ordered = [cur]; dur = cur['duration']
    while pool and dur < target_dur_sec:
        ck = get_next_harmonic(cur['camelot'])
        idx = next((i for i,t in enumerate(pool) if t['camelot'] in ck or t['camelot']==cur['camelot']), 0)
        nxt = pool.pop(idx); ordered.append(nxt); dur += nxt['duration']; cur = nxt
    return ordered, pool

def clean_name(n): return re.sub(r'[\-\_\.]+?', ' ', re.sub(r'^[\w\-]+?-', '', os.path.basename(n).rsplit('.',1)[0])).strip()

## üöÄ 4. Lanzamiento del Motor (GPU + Parallel CPU)

In [None]:
INPUT_DIR = '/kaggle/input/datasets/danieldobles/slavic-songs'
if not os.path.exists(INPUT_DIR): INPUT_DIR = 'Slavic Data_Set'
REF_FILE = '/kaggle/input/datasets/danieldobles/slavic-songs/REF.flac'
OUT_DIR = '/kaggle/working/MASTERED_CURATION_RESULTS'
SET_DURATION = 60 * 60; N_GROUPS = 3

os.makedirs(OUT_DIR, exist_ok=True)
files = [os.path.join(INPUT_DIR, f) for f in os.listdir(INPUT_DIR) if f.endswith(('.mp3','.wav','.flac')) and 'REF' not in f.upper() ]

print(f"üß† Analizando {len(files)} temas en GPU...")
analyzer = NeuralAnalyzer(device=DEVICE)
library = []
for i in tqdm(range(0, len(files), 32)): library.extend(analyzer.analyze_batch(files[i:i+32]))

print("üîç Clustering Sem√°ntico...")
X = normalize(np.array([t['embedding'] for t in library]))
clusters = {i: [library[j] for j,l in enumerate(KMeans(n_clusters=N_GROUPS, n_init=10).fit_predict(X)) if l==i] for i in range(N_GROUPS)}

print(f"\n‚ö° MASTERING PARALELO EN CPU ({MAX_WORKERS} canciones simult√°neas)...")
for ci, tracks in clusters.items():
    group_name = f'Group_{chr(65+ci)}'; pool = tracks; set_idx = 1
    while pool:
        set_dir = os.path.join(OUT_DIR, group_name, f'Set_{set_idx}'); os.makedirs(set_dir, exist_ok=True)
        oset, pool = sequence_chromatic_set(pool, SET_DURATION)
        
        # Tareas para el pool paralelo
        master_tasks = []
        for j, t in enumerate(oset):
            out_name = f"{str(j+1).zfill(2)} - [{t['camelot']}] {t['bpm']}BPM {clean_name(t['path'])}.wav"
            master_tasks.append((t['path'], REF_FILE, os.path.join(set_dir, out_name)))
        
        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            futures = [executor.submit(apply_mastering_worker, *task) for task in master_tasks]
            list(tqdm(as_completed(futures), total=len(futures), desc=f"Mastering {group_name} Set {set_idx}", leave=False))
        set_idx += 1

shutil.make_archive('SPECTRAL_AFFINITY_MASTERED', 'zip', OUT_DIR)
display(HTML(f"<h3>üöÄ <a href='SPECTRAL_AFFINITY_MASTERED.zip'>DESCARGAR LIBRER√çA MASTERIZADA</a></h3>"))