# üåå Spectral Affinity Master: Neural Chromatic Hybrid (v4.0)

This system unifies **DSP-based Harmonic Mixing** with **Neural Semantic Affinity (MERT)** for the ultimate GPU-accelerated music organization experience.

### üß¨ Professional Hybrid Logic:
- üß† **Neural Brain (MERT):** Extracts deep sonic signatures to understand the 'vibe' and group affinity.
- üî• **DSP Brain (nnAudio GPU):** Precision detection of Musical Key, Mode, Energy, and spectral intensity.
- üåà **Chromatic Flow Engine:** Intelligent sequencing following the **Camelot Wheel** (e.g., 1B ‚Üí 2B or 1B ‚Üí 1A).
- üèñÔ∏è **Island Clustering:** Groups tracks into semantic 'Islands' based on group affinity before sequencing.
- üì¶ **Turbo-Batching:** Full CUDA saturation for lightning-fast analysis of large libraries.

### 1. üõ†Ô∏è High-Performance Environment

In [None]:
import os, warnings, json, torch, torchaudio, librosa, glob, shutil, re, gc, pathlib
import numpy as np
from tqdm.auto import tqdm
from concurrent.futures import ThreadPoolExecutor
from IPython.display import HTML, FileLink, display
import torchaudio.transforms as T
from transformers import Wav2Vec2FeatureExtractor, AutoModel, logging as hf_logging
from sklearn.preprocessing import normalize
from sklearn.cluster import AffinityPropagation, KMeans as skKMeans

warnings.filterwarnings('ignore')
hf_logging.set_verbosity_error()
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üî• ACCELERATOR: {torch.cuda.get_device_name(0) if device == 'cuda' else 'CPU'}")

# üì¶ Install Missing GPU libs
try:
    from nnAudio.Spectrogram import CQT1992v2
except:
    !pip install -q nnAudio transformers
    from nnAudio.Spectrogram import CQT1992v2

try:
    import cuml
    from cuml.cluster import KMeans as cuKMeans
    HAS_CUML = True
except:
    HAS_CUML = False

### 2. üß† The Unified Neural & DSP Core

In [None]:
class SpectralMasterEngine:
    def __init__(self, device='cuda', sr=22050, cache_file='spectral_master_cache.json'):
        self.device = device
        self.sr = sr
        self.cache_file = cache_file
        self.cache = self._load_cache()
        
        # 1. DSP Layers
        self.cqt_layer = CQT1992v2(sr=self.sr, n_bins=84, bins_per_octave=12).to(self.device)
        major = torch.tensor([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88], device=device)
        minor = torch.tensor([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17], device=device)
        self.profiles = torch.stack([torch.roll(major, i) for i in range(12)] + 
                                    [torch.roll(minor, i) for i in range(12)]).t()
        
        # 2. Neural Layers (MERT)
        print("üß† Loading MERT Neural Brain...")
        self.mert_model_id = "m-a-p/MERT-v1-95M"
        self.processor = Wav2Vec2FeatureExtractor.from_pretrained(self.mert_model_id, trust_remote_code=True)
        self.mert_model = AutoModel.from_pretrained(self.mert_model_id, trust_remote_code=True).to(self.device)
        self.mert_model.eval()

    def _load_cache(self):
        if os.path.exists(self.cache_file):
            with open(self.cache_file, 'r') as f: return json.load(f)
        return {}

    def save_cache(self):
        with open(self.cache_file, 'w') as f: json.dump(self.cache, f)

    def get_camelot(self, key, mode):
        c_map = {
            ('B', 'major'): '01B', ('F#', 'major'): '02B', ('C#', 'major'): '03B', ('G#', 'major'): '04B',
            ('D#', 'major'): '05B', ('A#', 'major'): '06B', ('F', 'major'): '07B', ('C', 'major'): '08B',
            ('G', 'major'): '09B', ('D', 'major'): '10B', ('A', 'major'): '11B', ('E', 'major'): '12B',
            ('G#', 'minor'): '01A', ('D#', 'minor'): '02A', ('A#', 'minor'): '03A', ('F', 'minor'): '04A',
            ('C', 'minor'): '05A', ('G', 'minor'): '06A', ('D', 'minor'): '07A', ('A', 'minor'): '08A',
            ('E', 'minor'): '09A', ('B', 'minor'): '10A', ('F#', 'minor'): '11A', ('C#', 'minor'): '12A'
        }
        return c_map.get((key, mode.lower()), "00X")

    def process_batch(self, batch_data):
        paths = [x[0] for x in batch_data]
        audios = [x[1] for x in batch_data]
        durations = [x[2] for x in batch_data]
        
        max_len_dsp = self.sr * 120 # 2 mins for Key
        
        dsp_tensor = torch.zeros(len(audios), max_len_dsp, device=self.device)
        for i, a in enumerate(audios):
            l = min(len(a), max_len_dsp)
            dsp_tensor[i, :l] = torch.from_numpy(a[:l]).to(self.device)

        with torch.no_grad():
            # --- A. DSP Logic ---
            spec = self.cqt_layer(dsp_tensor)
            energy = spec.pow(2).mean(dim=(1, 2)).cpu().numpy()
            chroma = spec.view(len(audios), 7, 12, -1).sum(dim=(1, 3))
            chroma = chroma / (chroma.norm(dim=1, keepdim=True) + 1e-6)
            corrs = torch.matmul(chroma, self.profiles)
            best_idx = torch.argmax(corrs, dim=1).cpu().numpy()
            
            # --- B. Neural Logic (MERT) ---
            embeddings_list = []
            pc = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
            
            for i in range(len(audios)):
                audio_slice = audios[i][len(audios[i])//4 : len(audios[i])//4 + 22050*15]
                if len(audio_slice) < 22050*5: audio_slice = audios[i][:22050*15]
                
                input_values = self.processor(audio_slice, sampling_rate=22050, return_tensors="pt").input_values.to(self.device)
                out = self.mert_model(input_values)
                emb = out.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
                embeddings_list.append(emb.tolist())
                
        batch_results = []
        for i in range(len(audios)):
            idx = best_idx[i]
            res = {
                'key': pc[idx % 12],
                'mode': 'major' if idx < 12 else 'minor',
                'energy': float(energy[i]),
                'duration': float(durations[i]),
                'embedding': embeddings_list[i],
                'path': paths[i]
            }
            try:
                onset = librosa.onset.onset_strength(y=audios[i][:22050*60], sr=22050)
                res['bpm'] = float(librosa.beat.tempo(onset_envelope=onset, sr=22050, aggregate=np.mean)[0])
            except: res['bpm'] = 120.0
            res['camelot'] = self.get_camelot(res['key'], res['mode'])
            batch_results.append(res)
            
        return batch_results

### 3. üåà Chromatic Mixer & Affinity Logic

In [None]:
def get_next_harmonic(current_cam):
    """
    Calculates compatible Camelot keys for chromatic mixing.
    Enables transitions like 1B -> 2B (next tone) or 1B -> 1A (parallel).
    """
    num = int(current_cam[:2])
    alpha = current_cam[2]
    
    neighbors = [
        f"{str(num).zfill(2)}{alpha}",               # Constant (Stays in key)
        f"{str((num % 12) + 1).zfill(2)}{alpha}",    # Chromatic (+1 step on wheel)
        f"{str(((num - 2) % 12) + 1).zfill(2)}{alpha}", # Chromatic (-1 step on wheel)
        f"{str(num).zfill(2)}{'A' if alpha == 'B' else 'B'}" # Parallel Mood Shift
    ]
    return neighbors

def sequence_chromatic_set(tracks, target_duration):
    """
    Advanced Greedy Sequencer:
    Uses a hybrid scoring system to select the next track in the set.
    Scores based on Harmonic Compatibility + Semantic Affinity + BPM Consistency.
    """
    if not tracks: return []
    
    pool = list(tracks)
    # Starting track (Seed)
    current = pool.pop(0)
    ordered_set = [current]
    current_dur = current['duration']
    
    while pool and current_dur < target_duration:
        compat_keys = get_next_harmonic(current['camelot'])
        
        def score(t):
            # 1. Harmonic Score (Crucial for Chromatic Mixing)
            # We give a slight edge to actually MOVING to a new compatible key for variety
            if t['camelot'] == current['camelot']:
                h_score = 0.8 # Good
            elif t['camelot'] in compat_keys:
                h_score = 1.0 # Best (Moves the wheel)
            else:
                h_score = 0.0 # Clash
            
            # 2. BPM Score (Ensures smooth tempo flow)
            bpm_diff = abs(t['bpm'] - current['bpm'])
            b_score = max(0, 1.0 - (bpm_diff / 40.0))
            
            # 3. Semantic Score (Group Affinity via MERT)
            s_score = np.dot(current['embedding'], t['embedding']) / (np.linalg.norm(current['embedding']) * np.linalg.norm(t['embedding']) + 1e-9)
            
            # Final weighted score
            return (h_score * 0.5) + (s_score * 0.3) + (b_score * 0.2)
        
        pool.sort(key=score, reverse=True)
        next_t = pool.pop(0)
        ordered_set.append(next_t)
        current_dur += next_t['duration']
        current = next_t
        
    return ordered_set

def clean_name(n):
    n = os.path.basename(n)
    name = n.rsplit('.', 1)[0]
    name = re.sub(r"^[\w\-]+?-", "", name)
    name = re.sub(r"[\-\_\.]+?", " ", name).strip()
    return f"{name}.{n.rsplit('.', 1)[1]}"

### 4. üöÄ Run Execution Pipeline

In [None]:
# CONFIG
INPUT_DIR = "/kaggle/input/datasets/danieldobles/ost-songs"
OUTPUT_DIR = "/kaggle/working/master_sets"
BATCH_SIZE = 16
SET_DUR = 75 * 60 # 75 mins per set
N_CLUSTERS = 4    # Semantic Affinity groups

master = SpectralMasterEngine(device=device)

print("üîç Scanning Library...")
paths = []
for ext in ['*.mp3', '*.wav', '*.flac', '*.m4a']:
    paths.extend(glob.glob(os.path.join(INPUT_DIR, "**", ext), recursive=True))
paths = list(set(paths))

to_analyze = [p for p in paths if p not in master.cache]
print(f"üì¶ Total: {len(paths)} | üß† Cached: {len(paths)-len(to_analyze)} | üöÄ New: {len(to_analyze)}")

if to_analyze:
    chunks = [to_analyze[i:i+BATCH_SIZE] for i in range(0, len(to_analyze), BATCH_SIZE)]
    with ThreadPoolExecutor(max_workers=2) as pool:
        for chunk in tqdm(chunks, desc="üî• AI DSP Extraction"):
            def load(p):
                try: y, _ = librosa.load(p, sr=22050); return (p, y, len(y)/22050)
                except: return None
            batch_data = [x for x in list(pool.map(load, chunk)) if x is not None]
            if not batch_data: continue
            results = master.process_batch(batch_data)
            for r in results: master.cache[r['path']] = r
            master.save_cache()
            gc.collect()

library = [master.cache[p] for p in paths if p in master.cache]

# --- STEP A: Semantic Grouping (Affinity) ---
print("üèñÔ∏è Clustering by Semantic Affinity...")
X = normalize(np.array([t['embedding'] for t in library]))
if HAS_CUML:
    p_labels = cuKMeans(n_clusters=N_CLUSTERS).fit_predict(X)
else:
    p_labels = skKMeans(n_clusters=N_CLUSTERS, n_init=10).fit_predict(X)

clusters = {i: [] for i in range(N_CLUSTERS)}
for i, l in enumerate(p_labels): clusters[l].append(library[i])

# --- STEP B: Chromatic Sequencing within Clusters ---
if os.path.exists(OUTPUT_DIR): shutil.rmtree(OUTPUT_DIR)

print("üåà Sequencing Chromatic Sets...")
final_sets_count = 0
for c_idx, cluster_tracks in clusters.items():
    c_name = f"Group_{chr(65+c_idx)}"
    cluster_tracks.sort(key=lambda x: x['energy'])
    
    temp_pool = list(cluster_tracks)
    set_idx = 1
    while temp_pool:
        ordered_set = sequence_chromatic_set(temp_pool, SET_DUR)
        if not ordered_set: break
        
        used_paths = {t['path'] for t in ordered_set}
        temp_pool = [t for t in temp_pool if t['path'] not in used_paths]
        
        s_dir = os.path.join(OUTPUT_DIR, c_name, f"Set_{set_idx}")
        os.makedirs(s_dir, exist_ok=True)
        for i, t in enumerate(ordered_set):
            meta = f"[{t['camelot']} - {int(t['bpm'])}BPM]"
            filename = f"{str(i+1).zfill(2)} - {meta} {clean_name(t['path'])}"
            shutil.copy2(t['path'], os.path.join(s_dir, filename))
        
        set_idx += 1
        final_sets_count += 1

print(f"‚úÖ DONE! Generated {final_sets_count} chromatic sets in {N_CLUSTERS} affinity groups.")
zip_name = "SpectralAffinity_MasterMix.zip"
!zip -0 -rq {zip_name} master_sets
display(HTML(f"<h3>üöÄ <a href='{zip_name}' id='dl'>DOWNLOAD MASTER MIXES</a></h3>"))
display(HTML("<script>setTimeout(() => document.getElementById('dl').click(), 1000);</script>"))