# Phase 2: Audio Baseline Experiments

Comprehensive experiments for the ISMIR paper.

## Experiments
- **B0**: Baseline re-run (MERT+MLP, L13-24, mean pool)
- **A1-A3**: Baselines (linear probe, Mel-CNN, raw statistics)
- **B1a-B1d**: Layer ablation (1-6, 7-12, 13-24, 1-24)
- **B2a-B2c**: Pooling ablation (max, attention, LSTM)
- **C1a-C1b**: Loss ablation (hybrid MSE+CCC, pure CCC)

## Requirements
- Compute: A100 (80GB VRAM)
- rclone configured with `gdrive:` remote

In [None]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    raise RuntimeError("GPU required")

In [None]:
!curl -fsSL https://rclone.org/install.sh | sudo bash 2>&1 | grep -E "(successfully|already)" || echo "rclone installed"

In [None]:
# Install dependencies and clone repo
!pip install transformers librosa soundfile pytorch_lightning nnAudio --quiet

# Clone the repo
import os
REPO_DIR = '/tmp/crescendai'
if os.path.exists(REPO_DIR):
    !cd {REPO_DIR} && git pull origin main
else:
    !git clone https://github.com/jai-dhiman/crescendai.git {REPO_DIR}

print(f"Repo: {REPO_DIR}")

In [None]:
# 3. Setup imports
import sys
sys.path.insert(0, f'{REPO_DIR}/model/src')

import json
import subprocess
import warnings
from pathlib import Path

import numpy as np
import pytorch_lightning as pl

# Import from our package
from audio_experiments import PERCEPIANO_DIMENSIONS, BASE_CONFIG, SEED
from audio_experiments.extractors import (
    extract_mert_for_layer_range,
    extract_mel_spectrograms,
    extract_statistics_for_all,
)
from audio_experiments.models import BaseMERTModel, LinearProbeModel, MelCNNModel, StatsMLPModel
from audio_experiments.training import (
    run_4fold_mert_experiment,
    run_4fold_mel_experiment,
    run_4fold_stats_experiment,
    restore_all_from_gdrive,
    sync_experiment_to_gdrive,
    print_experiment_status,
)

warnings.filterwarnings('ignore')
torch.set_float32_matmul_precision('medium')
pl.seed_everything(SEED, workers=True)

print(f"PyTorch: {torch.__version__}")
print(f"Imports: OK")

In [None]:
# 4. Setup paths and download data
DATA_ROOT = Path('/tmp/phase2')
AUDIO_DIR = DATA_ROOT / 'audio'
LABEL_DIR = DATA_ROOT / 'labels'
MERT_CACHE_ROOT = DATA_ROOT / 'mert_cache'
MEL_CACHE_DIR = DATA_ROOT / 'mel_cache'
STATS_CACHE_DIR = DATA_ROOT / 'stats_cache'
CHECKPOINT_ROOT = DATA_ROOT / 'checkpoints'
RESULTS_DIR = DATA_ROOT / 'results'
LOG_DIR = DATA_ROOT / 'logs'

GDRIVE_AUDIO = 'gdrive:crescendai_data/audio_baseline/percepiano_rendered'
GDRIVE_LABELS = 'gdrive:crescendai_data/percepiano_labels'
GDRIVE_FOLDS = 'gdrive:crescendai_data/audio_baseline/audio_fold_assignments.json'
GDRIVE_MERT_CACHE = 'gdrive:crescendai_data/audio_baseline/mert_embeddings'
GDRIVE_RESULTS = 'gdrive:crescendai_data/checkpoints/audio_phase2'

for d in [AUDIO_DIR, LABEL_DIR, MERT_CACHE_ROOT, MEL_CACHE_DIR, STATS_CACHE_DIR,
          CHECKPOINT_ROOT, RESULTS_DIR, LOG_DIR]:
    d.mkdir(parents=True, exist_ok=True)

def run_rclone(cmd, desc):
    print(f"{desc}...")
    subprocess.run(cmd, capture_output=True)

# Check rclone
result = subprocess.run(['rclone', 'listremotes'], capture_output=True, text=True)
if 'gdrive:' not in result.stdout:
    raise RuntimeError("rclone 'gdrive' not configured")

# Download data
run_rclone(['rclone', 'copy', GDRIVE_AUDIO, str(AUDIO_DIR), '--progress'], "Downloading audio")
run_rclone(['rclone', 'copy', GDRIVE_LABELS, str(LABEL_DIR)], "Downloading labels")

FOLD_FILE = DATA_ROOT / 'folds.json'
run_rclone(['rclone', 'copyto', GDRIVE_FOLDS, str(FOLD_FILE)], "Downloading folds")

# Load labels and folds
LABEL_FILE = LABEL_DIR / 'label_2round_mean_reg_19_with0_rm_highstd0.json'
with open(LABEL_FILE) as f:
    LABELS = json.load(f)
with open(FOLD_FILE) as f:
    FOLD_ASSIGNMENTS = json.load(f)

ALL_KEYS = list(LABELS.keys())
print(f"Audio: {len(list(AUDIO_DIR.glob('*.wav')))} files")
print(f"Labels: {len(LABELS)} segments")

In [None]:
# 5. Restore MERT cache and completed experiments from GDrive
DEFAULT_MERT_DIR = MERT_CACHE_ROOT / 'L13-24'
DEFAULT_MERT_DIR.mkdir(parents=True, exist_ok=True)

result = subprocess.run(['rclone', 'lsf', GDRIVE_MERT_CACHE], capture_output=True, text=True)
if result.returncode == 0 and '.pt' in result.stdout:
    print("Restoring MERT cache...")
    run_rclone(['rclone', 'copy', GDRIVE_MERT_CACHE, str(DEFAULT_MERT_DIR)], "Restoring cache")
    print(f"Restored: {len(list(DEFAULT_MERT_DIR.glob('*.pt')))} embeddings")

# Results tracker
ALL_RESULTS = {}

# All experiment IDs in execution order
ALL_EXPERIMENT_IDS = [
    'B0_baseline', 'A1_linear_probe', 'A2_mel_cnn', 'A3_raw_stats',
    'B1a_layers_1-6', 'B1b_layers_7-12', 'B1c_layers_13-24', 'B1d_layers_1-24',
    'B2a_max_pool', 'B2b_attention_pool', 'B2c_lstm_pool',
    'C1a_hybrid_loss', 'C1b_pure_ccc',
]

# Restore completed experiments from GDrive (enables automatic skipping)
print("\nChecking GDrive for completed experiments...")
restored = restore_all_from_gdrive(
    GDRIVE_RESULTS,
    RESULTS_DIR,
    CHECKPOINT_ROOT,
    ALL_RESULTS,
)

# Show status
from audio_experiments.training import get_completed_experiments
completed = get_completed_experiments(GDRIVE_RESULTS)
print_experiment_status(ALL_EXPERIMENT_IDS, completed)

---
## Experiments

In [None]:
# B0: Baseline
extract_mert_for_layer_range(13, 25, AUDIO_DIR, DEFAULT_MERT_DIR, ALL_KEYS)

def make_mert_model(cfg):
    return BaseMERTModel(
        input_dim=cfg['input_dim'], hidden_dim=cfg['hidden_dim'],
        dropout=cfg['dropout'], learning_rate=cfg['learning_rate'],
        weight_decay=cfg['weight_decay'], pooling=cfg.get('pooling', 'mean'),
        loss_type=cfg.get('loss_type', 'mse'), max_epochs=cfg['max_epochs'],
    )

ALL_RESULTS['B0_baseline'] = run_4fold_mert_experiment(
    'B0_baseline', 'MERT+MLP, L13-24, mean pooling',
    make_mert_model, DEFAULT_MERT_DIR, LABELS, FOLD_ASSIGNMENTS,
    BASE_CONFIG, CHECKPOINT_ROOT, RESULTS_DIR, LOG_DIR
)
sync_experiment_to_gdrive(
    'B0_baseline', ALL_RESULTS['B0_baseline'],
    RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS
)

In [None]:
# A1: Linear Probe
def make_linear_probe(cfg):
    return LinearProbeModel(
        input_dim=cfg['input_dim'], learning_rate=cfg['learning_rate'],
        weight_decay=cfg['weight_decay'], max_epochs=cfg['max_epochs'],
    )

ALL_RESULTS['A1_linear_probe'] = run_4fold_mert_experiment(
    'A1_linear_probe', 'Linear probe on MERT',
    make_linear_probe, DEFAULT_MERT_DIR, LABELS, FOLD_ASSIGNMENTS,
    BASE_CONFIG, CHECKPOINT_ROOT, RESULTS_DIR, LOG_DIR
)
sync_experiment_to_gdrive(
    'A1_linear_probe', ALL_RESULTS['A1_linear_probe'],
    RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS
)

In [None]:
# A2: Mel-CNN
extract_mel_spectrograms(AUDIO_DIR, MEL_CACHE_DIR, ALL_KEYS)

ALL_RESULTS['A2_mel_cnn'] = run_4fold_mel_experiment(
    'A2_mel_cnn', '4-layer CNN on mel spectrograms',
    MEL_CACHE_DIR, LABELS, FOLD_ASSIGNMENTS,
    BASE_CONFIG, CHECKPOINT_ROOT, RESULTS_DIR, LOG_DIR
)
sync_experiment_to_gdrive(
    'A2_mel_cnn', ALL_RESULTS['A2_mel_cnn'],
    RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS
)

In [None]:
# A3: Raw Statistics
extract_statistics_for_all(AUDIO_DIR, STATS_CACHE_DIR, ALL_KEYS)

ALL_RESULTS['A3_raw_stats'] = run_4fold_stats_experiment(
    'A3_raw_stats', 'MLP on audio statistics (49-dim)',
    STATS_CACHE_DIR, LABELS, FOLD_ASSIGNMENTS,
    BASE_CONFIG, CHECKPOINT_ROOT, RESULTS_DIR, LOG_DIR
)
sync_experiment_to_gdrive(
    'A3_raw_stats', ALL_RESULTS['A3_raw_stats'],
    RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS
)

In [None]:
# B1a-d: Layer Ablation
layer_configs = [
    ('B1a_layers_1-6', 1, 7, 'MERT layers 1-6 (early)'),
    ('B1b_layers_7-12', 7, 13, 'MERT layers 7-12 (mid)'),
    ('B1c_layers_13-24', 13, 25, 'MERT layers 13-24 (late)'),
    ('B1d_layers_1-24', 1, 25, 'MERT all layers 1-24'),
]

for exp_id, layer_start, layer_end, desc in layer_configs:
    cache_dir = MERT_CACHE_ROOT / f'L{layer_start}-{layer_end-1}'
    extract_mert_for_layer_range(layer_start, layer_end, AUDIO_DIR, cache_dir, ALL_KEYS)
    ALL_RESULTS[exp_id] = run_4fold_mert_experiment(
        exp_id, desc, make_mert_model, cache_dir, LABELS, FOLD_ASSIGNMENTS,
        BASE_CONFIG, CHECKPOINT_ROOT, RESULTS_DIR, LOG_DIR
    )
    sync_experiment_to_gdrive(
        exp_id, ALL_RESULTS[exp_id],
        RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS
    )

In [None]:
# B2a-c: Pooling Ablation
pooling_configs = [
    ('B2a_max_pool', 'max', 'MERT + max pooling'),
    ('B2b_attention_pool', 'attention', 'MERT + attention pooling'),
    ('B2c_lstm_pool', 'lstm', 'MERT + Bi-LSTM pooling'),
]

for exp_id, pooling, desc in pooling_configs:
    cfg = BASE_CONFIG.copy()
    cfg['pooling'] = pooling

    def make_pooling_model(cfg=cfg):
        return BaseMERTModel(
            input_dim=cfg['input_dim'], hidden_dim=cfg['hidden_dim'],
            dropout=cfg['dropout'], learning_rate=cfg['learning_rate'],
            weight_decay=cfg['weight_decay'], pooling=cfg['pooling'],
            loss_type='mse', max_epochs=cfg['max_epochs'],
        )

    ALL_RESULTS[exp_id] = run_4fold_mert_experiment(
        exp_id, desc, make_pooling_model, DEFAULT_MERT_DIR, LABELS, FOLD_ASSIGNMENTS,
        cfg, CHECKPOINT_ROOT, RESULTS_DIR, LOG_DIR
    )
    sync_experiment_to_gdrive(
        exp_id, ALL_RESULTS[exp_id],
        RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS
    )

In [None]:
# C1a-b: Loss Ablation
loss_configs = [
    ('C1a_hybrid_loss', 'hybrid', 'MERT + MSE + 0.5*CCC loss'),
    ('C1b_pure_ccc', 'ccc', 'MERT + pure CCC loss'),
]

for exp_id, loss_type, desc in loss_configs:
    cfg = BASE_CONFIG.copy()
    cfg['loss_type'] = loss_type

    def make_loss_model(cfg=cfg):
        return BaseMERTModel(
            input_dim=cfg['input_dim'], hidden_dim=cfg['hidden_dim'],
            dropout=cfg['dropout'], learning_rate=cfg['learning_rate'],
            weight_decay=cfg['weight_decay'], pooling='mean',
            loss_type=cfg['loss_type'], max_epochs=cfg['max_epochs'],
        )

    ALL_RESULTS[exp_id] = run_4fold_mert_experiment(
        exp_id, desc, make_loss_model, DEFAULT_MERT_DIR, LABELS, FOLD_ASSIGNMENTS,
        cfg, CHECKPOINT_ROOT, RESULTS_DIR, LOG_DIR
    )
    sync_experiment_to_gdrive(
        exp_id, ALL_RESULTS[exp_id],
        RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS
    )

---
## Results Summary

In [None]:
# Print results table
print("="*80)
print("PHASE 2 RESULTS SUMMARY")
print("="*80)

baseline_r2 = ALL_RESULTS.get('B0_baseline', {}).get('summary', {}).get('avg_r2', 0)

print(f"{'Experiment':<25} {'Avg R2':>10} {'95% CI':>20} {'vs B0':>10} {'Disp':>8}")
print("-"*80)

exp_order = [
    'B0_baseline', None,
    'A1_linear_probe', 'A2_mel_cnn', 'A3_raw_stats', None,
    'B1a_layers_1-6', 'B1b_layers_7-12', 'B1c_layers_13-24', 'B1d_layers_1-24', None,
    'B2a_max_pool', 'B2b_attention_pool', 'B2c_lstm_pool', None,
    'C1a_hybrid_loss', 'C1b_pure_ccc',
]

for exp_id in exp_order:
    if exp_id is None:
        print("-"*80)
        continue
    if exp_id not in ALL_RESULTS:
        continue

    r = ALL_RESULTS[exp_id]
    s = r['summary']
    ci = s.get('r2_ci_95', [0, 0])
    diff = s['avg_r2'] - baseline_r2 if exp_id != 'B0_baseline' else 0
    diff_str = f"{diff:+.3f}" if exp_id != 'B0_baseline' else '---'

    print(f"{exp_id:<25} {s['avg_r2']:>10.4f} [{ci[0]:.3f}, {ci[1]:.3f}] {diff_str:>10} {s.get('dispersion_ratio', 0):>8.2f}")

print("="*80)

In [None]:
# Final sync (redundant safety - individual experiments already synced)
with open(RESULTS_DIR / 'phase2_all_results.json', 'w') as f:
    json.dump(ALL_RESULTS, f, indent=2)

print("Final sync to Google Drive...")
run_rclone(['rclone', 'copy', str(RESULTS_DIR), GDRIVE_RESULTS], "Syncing results")
run_rclone(['rclone', 'copy', str(CHECKPOINT_ROOT), f"{GDRIVE_RESULTS}/checkpoints"], "Syncing checkpoints")

# Print final status
print_experiment_status(ALL_EXPERIMENT_IDS, {k: v['summary']['avg_r2'] for k, v in ALL_RESULTS.items()})
print("Done! Results at:", GDRIVE_RESULTS)