# üéµ Spectral Affinity: Neural AI Pipeline (MERT)

This version upgrades the "Hearing" system to use **MERT (Music Semantic Audio Transformer)**. 
Instead of mathematical formulas, it uses a Neural Network pre-trained on millions of songs to understand musical context.

---

### 1. Environment Setup

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
!pip install librosa soundfile tqdm joblib scikit-learn transformers torchaudio

### 2. Imports & AI Model Loader

In [None]:
import os
import re
import glob
import shutil
import pathlib
import warnings
import numpy as np
from tqdm.auto import tqdm
import torchaudio
import torchaudio.transforms as T
from transformers import Wav2Vec2FeatureExtractor, AutoModel
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.cluster import AffinityPropagation, KMeans as skKMeans
from IPython.display import FileLink

warnings.filterwarnings('ignore')

try:
    import cuml
    from cuml.cluster import KMeans as cuKMeans
    HAS_CUML = True
except ImportError:
    HAS_CUML = False

print("üß† Loading MERT-v1-95M AI Model...")
MODEL_ID = "m-a-p/MERT-v1-95M"
processor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModel.from_pretrained(MODEL_ID, trust_remote_code=True).to(device)
model.eval()
print(f"‚úÖ Model loaded on {device}")

### 3. Neural Logic

In [None]:
def clean_filename(filename):
    if '.' not in filename: return filename
    name_body, ext = filename.rsplit('.', 1)
    prefixes = [r"^Slavic-", r"^Theme_OST-", r"^My_Workspace-", r"^audio-"]
    for prefix in prefixes: name_body = re.sub(prefix, "", name_body, flags=re.IGNORECASE)
    uuid_pattern = r"[\(\.\-_\s]?[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}[\)]?$"
    name_body = re.sub(uuid_pattern, "", name_body)
    name_body = name_body.replace("_", " ").strip(" -(_)")
    name_body = re.sub(r"\s+", " ", name_body).strip()
    return f"{name_body if name_body else 'Unnamed'}.{ext}"

def get_ai_embeddings(file_path, duration=15): # 15s is enough for MERT to get the vibe
    try:
        # Load Audio
        info = torchaudio.info(file_path)
        sr = info.sample_rate
        total_frames = info.num_frames
        
        # Crop center
        target_frames = duration * sr
        if total_frames > target_frames:
            start_frame = (total_frames - target_frames) // 2
            waveform, _ = torchaudio.load(file_path, frame_offset=start_frame, num_frames=target_frames)
        else:
            waveform, _ = torchaudio.load(file_path)
            
        # Resample to 24k (MERT Native)
        if sr != 24000:
            resampler = T.Resample(sr, 24000).to(waveform.device)
            waveform = resampler(waveform)
            
        # Mix to mono
        if waveform.shape[0] > 1:
            waveform = waveform.mean(dim=0, keepdim=True)
            
        # Prepare input
        input_values = processor(waveform.squeeze().numpy(), sampling_rate=24000, return_tensors="pt").input_values.to(device)
        
        # Inference
        with torch.no_grad():
            outputs = model(input_values)
            # Use the last hidden state averaged over time
            hidden_states = outputs.last_hidden_state
            embeddings = hidden_states.mean(dim=1).squeeze().cpu().numpy()
            
        return embeddings

    except Exception as e:
        return f"ERROR: {str(e)}"

def organize_files(file_paths, rel_dest_paths, output_dir):
    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
    for i, file_path in enumerate(tqdm(file_paths, desc="üìÅ Organizing", leave=True)):
        full_dest = os.path.join(output_dir, rel_dest_paths[i])
        pathlib.Path(os.path.dirname(full_dest)).mkdir(parents=True, exist_ok=True)
        
        name_pure, ext = os.path.splitext(full_dest)
        final_path = full_dest
        counter = 1
        while os.path.exists(final_path):
            final_path = f"{name_pure} ({counter}){ext}"
            counter += 1
        shutil.copy2(file_path, final_path)

### 4. Neural Execution

In [None]:
# --- CONFIGURATION ---
INPUT_DIR = "/kaggle/input/datasets/danieldobles/ost-songs"
OUTPUT_DIR = "/kaggle/working/organized_music"
N_CONTINENTS = 5
ISLAND_SENSITIVITY = 0.8 # Higher = More sub-clusters
CLEAN_NAMES = True
# ---------------------

print("üéµ --- SPECTRAL AFFINITY: MERT NEURAL PIPELINE ---")

print("üîç Step 1/4: Mapping library...")
audio_extensions = ['*.mp3', '*.wav', '*.flac', '*.ogg', '*.m4a']
file_paths = []
for ext in audio_extensions:
    file_paths.extend(glob.glob(os.path.join(INPUT_DIR, ext)))
    file_paths.extend(glob.glob(os.path.join(INPUT_DIR, "**", ext), recursive=True))
file_paths = list(set(file_paths))
print(f"‚úÖ Found {len(file_paths)} files.")

if not file_paths:
    print("‚ùå ERROR: Nothing to process!")
else:
    print("\nüß† Step 2/4: Neural Listening (MERT AI on GPU)...")
    results = []
    errors = []
    
    # Sequential GPU inference is usually fast enough for MERT-95M
    for p in tqdm(file_paths, desc="üéß Embedding", leave=True):
        res = get_ai_embeddings(p)
        if isinstance(res, str) and res.startswith("ERROR:"):
            errors.append(res)
            results.append(None)
        else:
            results.append(res)
    
    valid_embeddings = [r for r in results if r is not None]
    valid_paths = [p for r, p in zip(results, file_paths) if r is not None]
    
    if valid_embeddings:
        # Normalize embeddings for Cosine Similarity behavior with KMeans
        X_global = normalize(np.array(valid_embeddings).astype('float32'))
        
        print(f"\nüåç Step 3/4: Continent Clustering... {'(GPU)' if HAS_CUML else '(CPU)'}")
        if HAS_CUML:
            primary_labels = cuKMeans(n_clusters=N_CONTINENTS).fit_predict(X_global)
        else:
            primary_labels = skKMeans(n_clusters=N_CONTINENTS, n_init=10).fit_predict(X_global)
        
        cluster_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        final_rel_paths = ["" for _ in valid_paths]
        
        print("\nüèñÔ∏è Step 4/4: Deep Semantic Affinity (Islands)...")
        for c_idx in range(N_CONTINENTS):
            indices = [i for i, l in enumerate(primary_labels) if l == c_idx]
            if not indices: continue
            
            c_letter = cluster_letters[c_idx % 26]
            X_sub = X_global[indices]
            
            # Affinity Propagation finds distinct 'exemplars'
            try:
                af = AffinityPropagation(damping=ISLAND_SENSITIVITY, random_state=42).fit(X_sub)
                sub_labels = af.labels_
                n_subs = len(set(sub_labels))
            except: 
                # Fallback if AF fails on strange data
                sub_labels = [0] * len(indices)
                n_subs = 1
            
            for i, local_idx in enumerate(indices):
                s_label = sub_labels[i]
                filename = os.path.basename(valid_paths[local_idx])
                if CLEAN_NAMES: filename = clean_filename(filename)
                
                if n_subs > 1: 
                    # Hierarchical Path: Cluster_A/Sub_1
                    sub_path = os.path.join(f"Cluster_{c_letter}", f"Sub_{s_label + 1}")
                    prefixed_name = f"[{c_letter}-{s_label + 1}] {filename}"
                    final_rel_paths[local_idx] = os.path.join(sub_path, prefixed_name)
                else:
                    # Flat Path: Cluster_A
                    final_rel_paths[local_idx] = os.path.join(f"Cluster_{c_letter}", f"[{c_letter}] {filename}")
        
        organize_files(valid_paths, final_rel_paths, OUTPUT_DIR)
        print(f"\n‚ú® SUCCESS: Semantic Organization Complete! Check {OUTPUT_DIR}")
    else:
        print("‚ùå ERROR: Neural extraction failed.")

### 5. Final Download

In [None]:
if os.path.exists(OUTPUT_DIR):
    print("üì¶ Packaging results (ZIP)...")
    !zip -0 -rq /kaggle/working/organized_results.zip organized_music
    print("‚úÖ Generation complete!")
    display(FileLink('organized_results.zip'))