# Bridge, Validation, and Distillation

Computes PercePiano composite labels, runs all 5 validation gates, and
executes the LLM distillation pilot with calibration analysis.

In [None]:
import os
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

import sys
sys.path.insert(0, 'src')

import json
import numpy as np
from pathlib import Path

COMPOSITE_DIR = Path("data/composite_labels")
PP_CACHE = Path("data/percepiano_cache")
MC_DIR = Path("data/masterclass_pipeline")

## Load taxonomy and PercePiano labels

In [None]:
from audio_experiments.constants import PERCEPIANO_DIMENSIONS

with open(COMPOSITE_DIR / "dimension_definitions.json") as f:
    taxonomy = json.load(f)

with open(PP_CACHE / "labels.json") as f:
    raw_labels = json.load(f)

pp_labels = {k: np.array(v[:19]) for k, v in raw_labels.items()}
dim_index = {d: i for i, d in enumerate(PERCEPIANO_DIMENSIONS)}
print(f"Loaded {len(pp_labels)} PercePiano segments, {len(taxonomy['dimensions'])} taxonomy dims")

## Define dimension mapping and compute bridge

In [None]:
from masterclass_experiments.bridge import compute_weights, compute_composite_labels
from masterclass_experiments.scoring import PERCEPIANO_MUQ_R2

# Manual: map each taxonomy dimension to PercePiano dimensions
# Based on config.rs percepiano_dims and cluster analysis
DIM_MAPPING = {
    # "dynamics": ["dynamic_range"],
    # "pedaling": ["pedal_amount", "pedal_clarity"],
    # ... fill in from taxonomy analysis
}

weights = compute_weights(DIM_MAPPING, PERCEPIANO_MUQ_R2)
composites = compute_composite_labels(pp_labels, weights, dim_index)

from masterclass_experiments.bridge import save_composite_labels
save_composite_labels(composites, COMPOSITE_DIR / "composite_labels.json")
print(f"Computed composite labels for {len(composites)} segments")

## Run validation gates

In [None]:
from masterclass_experiments.validation import run_all_gates
from masterclass_experiments.clustering import load_open_descriptions

_, descriptions = load_open_descriptions(MC_DIR / "open_moments.jsonl")
# Reload labels from taxonomy derivation
# ... (load cluster labels from saved artifacts)

# Build composite matrix [N_segments, N_dims] for independence check
dim_names = list(DIM_MAPPING.keys())
composite_matrix = np.array([
    [composites[seg][d] for d in dim_names]
    for seg in composites
])

# Load quote bank
with open(COMPOSITE_DIR / "quote_bank.json") as f:
    quote_bank = json.load(f)

# STOP AUC from re-running masterclass evaluation with composite dims
stop_auc = 0.0  # Fill in after running STOP experiment below

report = run_all_gates(
    labels=cluster_labels,  # from taxonomy notebook
    stop_auc=stop_auc,
    composite_matrix=composite_matrix,
    quote_bank=quote_bank,
    dim_names=dim_names,
)

print(f"\nAll gates passed: {report['all_passed']}")
for gate in report['gates']:
    status = 'PASS' if gate['passed'] else 'FAIL'
    print(f"  [{status}] {gate['gate']}")

## STOP prediction with composite dimensions

In [None]:
# Re-run STOP classifier using composite dimension scores instead of raw MuQ
from masterclass_experiments.evaluation import leave_one_video_out_cv

# Build feature matrix from composite labels of masterclass segments
# ... (similar to existing masterclass experiment but using composite scores)

## Distillation pilot (if all gates pass)

In [None]:
from masterclass_experiments.distillation import (
    build_rubric, build_scoring_prompt, parse_scores,
    calibration_analysis, go_no_go,
)

with open(COMPOSITE_DIR / "quote_bank.json") as f:
    quote_bank = json.load(f)

# Build rubric
rubric = build_rubric(taxonomy["dimensions"], quote_bank)
with open(COMPOSITE_DIR / "teacher_rubric.json", "w") as f:
    json.dump(rubric, f, indent=2)
print("Rubric built for dimensions:", list(rubric.keys()))

# Score T1 segments (requires API calls -- ~$36 at $0.03/segment)
# See distillation.py for the scoring loop using openai SDK

## Calibration analysis

In [None]:
# After scoring all 1,202 T1 segments, run calibration
# per_dim_results = {}
# for dim in dim_names:
#     teacher = np.array([scores[seg][dim] for seg in scores]) / 5.0  # normalize 1-5 to 0-1
#     composite = np.array([composites[seg][dim] for seg in scores])
#     per_dim_results[dim] = calibration_analysis(teacher, composite, dim)

# decision = go_no_go(per_dim_results, stop_auc=..., spot_check_accuracy=...)
# print(f"Go/No-Go: {'GO' if decision['go'] else 'NO-GO'}")

## Save final report

In [None]:
with open(COMPOSITE_DIR / "validation_report.json", "w") as f:
    json.dump(report, f, indent=2)

# if distillation ran:
# with open(COMPOSITE_DIR / "distillation_report.json", "w") as f:
#     json.dump({"per_dim": per_dim_results, "decision": decision}, f, indent=2)

print("Reports saved to", COMPOSITE_DIR)