# Phase 3: Fusion Experiments

Multimodal fusion: Audio (MERT) + Symbolic (MIDI) for ISMIR paper.

## Experiments
- **S0-S2**: Statistical tests (bootstrap, paired tests, multiple corrections)
- **F0-F4**: Fusion strategies (simple, weighted, ridge, confidence)
- **A0-A2**: Ablations (weight stability, category fusion, error correlation)

## Requirements
- rclone configured with `gdrive:` remote
- Audio/symbolic predictions from Phase 2

In [None]:
import os
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

import torch
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
!curl -fsSL https://rclone.org/install.sh | sudo bash 2>&1 | grep -E "(success|already)" || echo "rclone ok"

In [None]:
!pip install transformers librosa soundfile pytorch_lightning scipy scikit-learn --quiet

REPO_DIR = '/tmp/crescendai'
if os.path.exists(REPO_DIR):
    !cd {REPO_DIR} && git pull origin main
else:
    !git clone https://github.com/jai-dhiman/crescendai.git {REPO_DIR}
print(f"Repo: {REPO_DIR}")

In [None]:
import sys
sys.path.insert(0, f'{REPO_DIR}/model/src')

import json
import subprocess
import warnings
from pathlib import Path
import numpy as np
import pytorch_lightning as pl

from audio_experiments import PERCEPIANO_DIMENSIONS, SEED
from audio_experiments.training import (
    should_run_experiment, sync_experiment_to_gdrive,
    get_completed_experiments, print_experiment_status,
    run_bootstrap_experiment, run_paired_tests_experiment,
    run_multiple_correction_experiment, run_simple_fusion_experiment,
    run_weighted_fusion_experiment, run_ridge_fusion_experiment,
    run_confidence_fusion_experiment, run_weight_stability_experiment,
    run_category_fusion_experiment, run_error_correlation_experiment,
    save_fusion_experiment,
)

warnings.filterwarnings('ignore')
pl.seed_everything(SEED, workers=True)
print("Imports: OK")

In [None]:
DATA_ROOT = Path('/tmp/phase3')
RESULTS_DIR = DATA_ROOT / 'results'
CHECKPOINT_ROOT = DATA_ROOT / 'checkpoints'

GDRIVE_LABELS = 'gdrive:crescendai_data/percepiano_labels'
GDRIVE_FOLDS = 'gdrive:crescendai_data/audio_baseline/audio_fold_assignments.json'
GDRIVE_SYMBOLIC = 'gdrive:crescendai_data/analysis/symbolic_predictions.json'
GDRIVE_AUDIO = 'gdrive:crescendai_data/analysis/checkpoints/audio/cv_predictions.json'
GDRIVE_RESULTS = 'gdrive:crescendai_data/checkpoints/fusion_phase3'

for d in [DATA_ROOT, RESULTS_DIR, CHECKPOINT_ROOT]:
    d.mkdir(parents=True, exist_ok=True)

def rclone(cmd, desc):
    print(f"{desc}...")
    subprocess.run(cmd, capture_output=True)

# Check rclone
r = subprocess.run(['rclone', 'listremotes'], capture_output=True, text=True)
if 'gdrive:' not in r.stdout:
    raise RuntimeError("rclone gdrive not configured")

# Download data
LABEL_DIR = DATA_ROOT / 'labels'
LABEL_DIR.mkdir(exist_ok=True)
rclone(['rclone', 'copy', GDRIVE_LABELS, str(LABEL_DIR)], "Labels")
rclone(['rclone', 'copyto', GDRIVE_FOLDS, str(DATA_ROOT / 'folds.json')], "Folds")
rclone(['rclone', 'copyto', GDRIVE_SYMBOLIC, str(DATA_ROOT / 'symbolic.json')], "Symbolic")
rclone(['rclone', 'copyto', GDRIVE_AUDIO, str(DATA_ROOT / 'audio.json')], "Audio")

print("Data downloaded.")

In [None]:
with open(LABEL_DIR / 'label_2round_mean_reg_19_with0_rm_highstd0.json') as f:
    LABELS = json.load(f)
with open(DATA_ROOT / 'folds.json') as f:
    FOLD_ASSIGNMENTS = json.load(f)
with open(DATA_ROOT / 'symbolic.json') as f:
    SYMBOLIC_RAW = json.load(f)
with open(DATA_ROOT / 'audio.json') as f:
    AUDIO_RAW = json.load(f)

SAMPLE_KEYS = sorted(set(LABELS) & set(SYMBOLIC_RAW) & set(AUDIO_RAW) & set(FOLD_ASSIGNMENTS))
print(f"Aligned: {len(SAMPLE_KEYS)} samples")

LABELS_ARR = np.array([[LABELS[k][d] for d in PERCEPIANO_DIMENSIONS] for k in SAMPLE_KEYS])
SYMBOLIC_ARR = np.array([SYMBOLIC_RAW[k] for k in SAMPLE_KEYS])
AUDIO_ARR = np.array([AUDIO_RAW[k] for k in SAMPLE_KEYS])

print(f"Shapes: labels={LABELS_ARR.shape}, audio={AUDIO_ARR.shape}, symbolic={SYMBOLIC_ARR.shape}")

In [None]:
ALL_RESULTS = {}
ALL_EXPERIMENT_IDS = [
    'S0_bootstrap', 'S1_paired_tests', 'S2_multiple_correction',
    'F0_simple', 'F1_weighted', 'F2_ridge', 'F3_confidence',
    'A0_stability', 'A1_category', 'A2_error_corr',
]

COMPLETED_CACHE = get_completed_experiments(GDRIVE_RESULTS)
print_experiment_status(ALL_EXPERIMENT_IDS, COMPLETED_CACHE)

---
## Statistical Tests

In [None]:
# S0: Bootstrap CIs
if should_run_experiment('S0_bootstrap', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    ALL_RESULTS['S0_bootstrap'] = run_bootstrap_experiment(
        'S0_bootstrap', AUDIO_ARR, SYMBOLIC_ARR, LABELS_ARR, n_bootstrap=10000
    )
    save_fusion_experiment('S0_bootstrap', ALL_RESULTS['S0_bootstrap'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('S0_bootstrap', ALL_RESULTS['S0_bootstrap'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

In [None]:
# S1: Paired Tests
if should_run_experiment('S1_paired_tests', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    ALL_RESULTS['S1_paired_tests'] = run_paired_tests_experiment(
        'S1_paired_tests', AUDIO_ARR, SYMBOLIC_ARR, LABELS_ARR
    )
    save_fusion_experiment('S1_paired_tests', ALL_RESULTS['S1_paired_tests'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('S1_paired_tests', ALL_RESULTS['S1_paired_tests'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

In [None]:
# S2: Multiple Correction
if should_run_experiment('S2_multiple_correction', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    # Requires S1 results
    if 'S1_paired_tests' not in ALL_RESULTS:
        with open(RESULTS_DIR / 'S1_paired_tests.json') as f:
            ALL_RESULTS['S1_paired_tests'] = json.load(f)
    
    ALL_RESULTS['S2_multiple_correction'] = run_multiple_correction_experiment(
        'S2_multiple_correction', ALL_RESULTS['S1_paired_tests']
    )
    save_fusion_experiment('S2_multiple_correction', ALL_RESULTS['S2_multiple_correction'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('S2_multiple_correction', ALL_RESULTS['S2_multiple_correction'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

---
## Fusion Strategies

In [None]:
# F0: Simple Average
if should_run_experiment('F0_simple', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    ALL_RESULTS['F0_simple'] = run_simple_fusion_experiment(
        'F0_simple', AUDIO_ARR, SYMBOLIC_ARR, LABELS_ARR
    )
    save_fusion_experiment('F0_simple', ALL_RESULTS['F0_simple'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('F0_simple', ALL_RESULTS['F0_simple'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

In [None]:
# F1: Weighted Fusion (CV)
if should_run_experiment('F1_weighted', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    ALL_RESULTS['F1_weighted'] = run_weighted_fusion_experiment(
        'F1_weighted', AUDIO_ARR, SYMBOLIC_ARR, LABELS_ARR,
        FOLD_ASSIGNMENTS, SAMPLE_KEYS
    )
    save_fusion_experiment('F1_weighted', ALL_RESULTS['F1_weighted'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('F1_weighted', ALL_RESULTS['F1_weighted'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

In [None]:
# F2: Ridge Stacking
if should_run_experiment('F2_ridge', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    ALL_RESULTS['F2_ridge'] = run_ridge_fusion_experiment(
        'F2_ridge', AUDIO_ARR, SYMBOLIC_ARR, LABELS_ARR,
        FOLD_ASSIGNMENTS, SAMPLE_KEYS
    )
    save_fusion_experiment('F2_ridge', ALL_RESULTS['F2_ridge'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('F2_ridge', ALL_RESULTS['F2_ridge'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

In [None]:
# F3: Confidence Weighted
if should_run_experiment('F3_confidence', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    ALL_RESULTS['F3_confidence'] = run_confidence_fusion_experiment(
        'F3_confidence', AUDIO_ARR, SYMBOLIC_ARR, LABELS_ARR
    )
    save_fusion_experiment('F3_confidence', ALL_RESULTS['F3_confidence'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('F3_confidence', ALL_RESULTS['F3_confidence'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

---
## Ablations

In [None]:
# A0: Weight Stability
if should_run_experiment('A0_stability', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    # Requires F1 results
    if 'F1_weighted' not in ALL_RESULTS:
        with open(RESULTS_DIR / 'F1_weighted.json') as f:
            ALL_RESULTS['F1_weighted'] = json.load(f)
    
    ALL_RESULTS['A0_stability'] = run_weight_stability_experiment(
        'A0_stability', ALL_RESULTS['F1_weighted']['fold_weights']
    )
    save_fusion_experiment('A0_stability', ALL_RESULTS['A0_stability'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('A0_stability', ALL_RESULTS['A0_stability'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

In [None]:
# A1: Category Fusion
if should_run_experiment('A1_category', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    ALL_RESULTS['A1_category'] = run_category_fusion_experiment(
        'A1_category', AUDIO_ARR, SYMBOLIC_ARR, LABELS_ARR,
        FOLD_ASSIGNMENTS, SAMPLE_KEYS
    )
    save_fusion_experiment('A1_category', ALL_RESULTS['A1_category'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('A1_category', ALL_RESULTS['A1_category'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

In [None]:
# A2: Error Correlation
if should_run_experiment('A2_error_corr', CHECKPOINT_ROOT, RESULTS_DIR, GDRIVE_RESULTS, COMPLETED_CACHE):
    ALL_RESULTS['A2_error_corr'] = run_error_correlation_experiment(
        'A2_error_corr', AUDIO_ARR, SYMBOLIC_ARR, LABELS_ARR
    )
    save_fusion_experiment('A2_error_corr', ALL_RESULTS['A2_error_corr'], RESULTS_DIR, ALL_RESULTS)
    sync_experiment_to_gdrive('A2_error_corr', ALL_RESULTS['A2_error_corr'], RESULTS_DIR, CHECKPOINT_ROOT, GDRIVE_RESULTS, ALL_RESULTS)

---
## Results Summary

In [None]:
# Print results table
print("="*80)
print("PHASE 3 FUSION RESULTS")
print("="*80)

# Load any missing results from disk
for exp_id in ALL_EXPERIMENT_IDS:
    if exp_id not in ALL_RESULTS:
        result_file = RESULTS_DIR / f"{exp_id}.json"
        if result_file.exists():
            with open(result_file) as f:
                ALL_RESULTS[exp_id] = json.load(f)

# Get baseline R2s
audio_r2 = ALL_RESULTS.get('S0_bootstrap', {}).get('audio', {}).get('overall', {}).get('r2', 0)
symbolic_r2 = ALL_RESULTS.get('S0_bootstrap', {}).get('symbolic', {}).get('overall', {}).get('r2', 0)

print(f"\n{'Model':<25} {'R2':>10} {'95% CI':>25} {'vs Best':>12}")
print("-"*75)

best_single = max(audio_r2, symbolic_r2)

# Baselines
if 'S0_bootstrap' in ALL_RESULTS:
    s0 = ALL_RESULTS['S0_bootstrap']
    a = s0['audio']['overall']
    print(f"{'Audio':<25} {a['r2']:>10.4f} [{a['ci_lower']:.3f}, {a['ci_upper']:.3f}] {'---':>12}")
    s = s0['symbolic']['overall']
    print(f"{'Symbolic':<25} {s['r2']:>10.4f} [{s['ci_lower']:.3f}, {s['ci_upper']:.3f}] {'---':>12}")

print("-"*75)

# Fusion strategies
fusion_exps = [('F0_simple', 'Simple Average'), ('F1_weighted', 'Weighted CV'),
               ('F2_ridge', 'Ridge Stacking'), ('F3_confidence', 'Confidence')]
for exp_id, name in fusion_exps:
    if exp_id in ALL_RESULTS:
        r = ALL_RESULTS[exp_id]
        r2 = r['overall_r2']
        b = r['bootstrap']['overall']
        diff = r2 - best_single
        print(f"{name:<25} {r2:>10.4f} [{b['ci_lower']:.3f}, {b['ci_upper']:.3f}] {diff:>+12.4f}")

print("="*75)

In [None]:
# Final sync
with open(RESULTS_DIR / 'phase3_all_results.json', 'w') as f:
    json.dump(ALL_RESULTS, f, indent=2, default=str)

subprocess.run(['rclone', 'copy', str(RESULTS_DIR), GDRIVE_RESULTS], capture_output=True)
print(f"Done! Results at: {GDRIVE_RESULTS}")