# CAPOPM Master Experiment (Synthetic)

Partial validation under controlled synthetic assumptions.

In [8]:
import os, sys
REPO_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))  # if notebook in notebooks/
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)


In [9]:
import json
import os

from src.capopm.experiments.runner import run_experiment
def print_summary(label, results):
    print(f'\n{label} summary:')
    agg = results['aggregated_metrics']
    for model, metrics in agg.items():
        print(f'  {model}:')
        for key in ['brier', 'log_score', 'mae_prob', 'calibration_ece', 'coverage_90_ptrue', 'coverage_95_ptrue']:
            if key in metrics:
                print(f'    {key}: {metrics[key]}')
        diag = metrics.get('calibration_diagnostics')
        if diag is not None:
            print(f'    calibration_diagnostics: {diag}')
    if results.get('warnings'):
        print('  warnings:')
        for w in results['warnings']:
            print(f'    {w}')


config = {
    "seed": 123,
    "n_runs": 200,
    "p_true_dist": {"type": "fixed", "value": 0.55},
    "traders": {
        "n_traders": 50,
        "proportions": {"informed": 0.4, "noise": 0.5, "adversarial": 0.1},
        "params": {
            "informed": {"signal_quality": 0.7, "noise_yes_prob": 0.5, "herding_intensity": 0.0},
            "noise": {"signal_quality": 0.7, "noise_yes_prob": 0.5, "herding_intensity": 0.0},
            "adversarial": {"signal_quality": 0.7, "noise_yes_prob": 0.5, "herding_intensity": 0.0}
        }
    },
    "market": {
        "n_steps": 25,
        "arrivals_per_step": 3,
        "fee_rate": 0.0,
        "initial_yes_pool": 1.0,
        "initial_no_pool": 1.0,
        "signal_model": "conditional_on_state",
        "use_realized_state_for_signals": True,
        "herding_enabled": False,
        "size_dist": "fixed",
        "size_dist_params": {"size": 1.0}
    },
    "structural_cfg": {
        "T": 1.0,
        "K": 1.0,
        "S0": 1.0,
        "V0": 0.04,
        "kappa": 1.0,
        "theta": 0.04,
        "xi": 0.2,
        "rho": -0.3,
        "alpha": 0.7,
        "lambda": 0.1
    },
    "ml_cfg": {"base_prob": 0.5, "bias": 0.0, "noise_std": 0.02, "calibration": 1.0, "r_ml": 0.8},
    "prior_cfg": {"n_str": 10.0, "n_ml_eff": 5.0, "n_ml_scale": 1.0},
    "stage1_cfg": {"enabled": False},
    "stage2_cfg": {"enabled": False},
    "ece_bins": 10,
    "include_outcome_coverage": False
}

results = run_experiment(config)
print_summary('Baseline', results)
print("\nPaired tests (CAPOPM vs baselines):")
print(json.dumps(results["tests"], indent=2))

os.makedirs("results", exist_ok=True)
with open("results/master_experiment_summary.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2)

print("\nSaved results to results/master_experiment_summary.json")



Baseline summary:
  capopm:
    brier: 0.006514944410564751
    log_score: -0.7099463393823712
    mae_prob: 0.06645594510106426
    calibration_ece: 0.07988453018154136
    coverage_90_ptrue: 0.725
    coverage_95_ptrue: 0.795
    calibration_diagnostics: {'n_unique_predictions': 200, 'n_nonempty_bins': 4, 'degenerate_binning': False}
  raw_parimutuel:
    brier: 0.00807648844661832
    log_score: -0.7145341629769678
    mae_prob: 0.0746948051948052
    calibration_ece: 0.10142857142857148
    calibration_diagnostics: {'n_unique_predictions': 28, 'n_nonempty_bins': 6, 'degenerate_binning': False}
  structural_only:
    brier: 0.006966200704332167
    log_score: -0.711218736356168
    mae_prob: 0.06899529416313721
    calibration_ece: 0.09568235305098008
    coverage_90_ptrue: 0.725
    coverage_95_ptrue: 0.795
    calibration_diagnostics: {'n_unique_predictions': 28, 'n_nonempty_bins': 4, 'degenerate_binning': False}
  ml_only:
    brier: 0.007753649754253184
    log_score: -0.713668

## Phase 6 stress run (Stage 1 enabled)


In [10]:
stress_config = {
    **config,
    'stage1_cfg': {
        'enabled': True,
        'w_min': 0.3,
        'w_max': 1.0,
        'longshot_ref_p': 0.5,
        'longshot_gamma': 1.5,
        'herding_lambda': 0.8,
        'herding_window': 20
    }
}

stress_results = run_experiment(stress_config)
print_summary('Stress', stress_results)
print("\nPaired tests (CAPOPM vs baselines):")
print(json.dumps(stress_results["tests"], indent=2))

with open("results/master_experiment_stress_summary.json", "w", encoding="utf-8") as f:
    json.dump(stress_results, f, indent=2)

print("\nSaved results to results/master_experiment_stress_summary.json")



Stress summary:
  capopm:
    brier: 0.004842679433794546
    log_score: -0.7019065876557901
    mae_prob: 0.05684925091248223
    calibration_ece: 0.062041203339886566
    coverage_90_ptrue: 0.93
    coverage_95_ptrue: 0.995
    calibration_diagnostics: {'n_unique_predictions': 200, 'n_nonempty_bins': 3, 'degenerate_binning': False}
  raw_parimutuel:
    brier: 0.00807648844661832
    log_score: -0.7145341629769678
    mae_prob: 0.0746948051948052
    calibration_ece: 0.10142857142857148
    calibration_diagnostics: {'n_unique_predictions': 28, 'n_nonempty_bins': 6, 'degenerate_binning': False}
  structural_only:
    brier: 0.006966200704332167
    log_score: -0.711218736356168
    mae_prob: 0.06899529416313721
    calibration_ece: 0.09568235305098008
    coverage_90_ptrue: 0.725
    coverage_95_ptrue: 0.795
    calibration_diagnostics: {'n_unique_predictions': 28, 'n_nonempty_bins': 4, 'degenerate_binning': False}
  ml_only:
    brier: 0.007753649754253184
    log_score: -0.71366841