# Experiment 004: Optimize Safe Ensemble with C++ Tools

Strategy: Run C++ optimizers on the SAFE ENSEMBLE (70.615788), not the baseline.
The baseline is at a local optimum, but the ensemble has different configurations that might be improvable.

In [1]:
import sys
sys.path.insert(0, '/home/code')

import pandas as pd
import numpy as np
import subprocess
import shutil
import os
from utils import (
    load_submission, load_trees_for_n, get_trees_data_for_n,
    has_overlap_strict, get_bounding_box_side, calculate_score_for_n,
    score_submission, verify_submission_no_overlaps
)
import json

print("Utilities loaded successfully!")

Utilities loaded successfully!


In [2]:
# Load the safe ensemble
work_dir = '/home/code/experiments/004_optimize_ensemble'
os.chdir(work_dir)

ensemble_path = 'submission_best.csv'
ensemble_df = load_submission(ensemble_path)
print(f"Safe ensemble loaded: {ensemble_df.shape}")

# Score the ensemble
ensemble_score, ensemble_scores_by_n, _ = score_submission(ensemble_df, check_overlaps=False)
print(f"Safe ensemble score: {ensemble_score:.6f}")

# Also load baseline for comparison
baseline_path = '/home/code/experiments/000_baseline/submission.csv'
baseline_df = load_submission(baseline_path)
baseline_score, _, _ = score_submission(baseline_df, check_overlaps=False)
print(f"Baseline score: {baseline_score:.6f}")

Safe ensemble loaded: (20100, 4)


Safe ensemble score: 70.615788


Baseline score: 70.676102


In [3]:
# Check available C++ optimizers
bbox3_path = '/home/nonroot/snapshots/santa-2025/21329069570/code/code/bbox3'
sa_path = '/home/nonroot/snapshots/santa-2025/21329069570/code/code/sa_v1_parallel'

print(f"bbox3 exists: {os.path.exists(bbox3_path)}")
print(f"sa_v1_parallel exists: {os.path.exists(sa_path)}")

bbox3 exists: True
sa_v1_parallel exists: True


In [4]:
# Run bbox3 optimizer on the SAFE ENSEMBLE (not baseline!)
print("Running bbox3 optimizer on SAFE ENSEMBLE...")
print("Command: bbox3 -i submission_best.csv -n 50000 -r 10")
print()

result = subprocess.run(
    [bbox3_path, '-i', 'submission_best.csv', '-n', '50000', '-r', '10'],
    capture_output=True,
    text=True,
    timeout=600  # 10 minute timeout
)

print("STDOUT:")
print(result.stdout[-3000:] if len(result.stdout) > 3000 else result.stdout)
print("\nSTDERR:")
print(result.stderr[-1000:] if len(result.stderr) > 1000 else result.stderr)

Running bbox3 optimizer on SAFE ENSEMBLE...
Command: bbox3 -i submission_best.csv -n 50000 -r 10



STDOUT:
Loading submission_best.csv...
Initial score: 70.615788
Round 1/10
Score after round 1: 70.615788
Round 2/10
Score after round 2: 70.615788
Round 3/10
Score after round 3: 70.615788
Round 4/10
Score after round 4: 70.615788
Round 5/10
Score after round 5: 70.615788
Round 6/10
Score after round 6: 70.615788
Round 7/10
Score after round 7: 70.615788
Round 8/10
Score after round 8: 70.615788
Round 9/10
Score after round 9: 70.615788
Round 10/10
Score after round 10: 70.615788
Saving to submission.csv...
Final score: 70.615788


STDERR:



In [5]:
# Load the optimized submission and check score
optimized_df = load_submission('submission_best.csv')
optimized_score, optimized_scores_by_n, _ = score_submission(optimized_df, check_overlaps=False)

print(f"\nSafe ensemble score (before): {ensemble_score:.6f}")
print(f"Optimized score (after bbox3): {optimized_score:.6f}")
print(f"Improvement from bbox3: {ensemble_score - optimized_score:.6f}")
print(f"\nBaseline score: {baseline_score:.6f}")
print(f"Total improvement over baseline: {baseline_score - optimized_score:.6f}")


Safe ensemble score (before): 70.615788
Optimized score (after bbox3): 70.615788
Improvement from bbox3: 0.000000

Baseline score: 70.676102
Total improvement over baseline: 0.060314


In [6]:
# Verify no overlaps
print("\nVerifying no overlaps with strict tolerance...")
is_valid, overlapping_ns = verify_submission_no_overlaps(optimized_df)
print(f"Is valid: {is_valid}")
print(f"Overlapping N values: {overlapping_ns}")


Verifying no overlaps with strict tolerance...


Is valid: True
Overlapping N values: []


In [7]:
# If there are overlaps, replace those N values with baseline
if overlapping_ns:
    print(f"\nRepairing {len(overlapping_ns)} overlapping N values with baseline...")
    
    for n in overlapping_ns:
        # Get baseline data for this N
        baseline_n_data = get_trees_data_for_n(baseline_df, n)
        
        # Replace in optimized_df
        prefix = f"{n:03d}_"
        optimized_df = optimized_df[~optimized_df['id'].str.startswith(prefix)]
        optimized_df = pd.concat([optimized_df, baseline_n_data], ignore_index=True)
    
    # Sort by id
    optimized_df['n'] = optimized_df['id'].apply(lambda x: int(x.split('_')[0]))
    optimized_df['tree_idx'] = optimized_df['id'].apply(lambda x: int(x.split('_')[1]))
    optimized_df = optimized_df.sort_values(['n', 'tree_idx']).drop(columns=['n', 'tree_idx']).reset_index(drop=True)
    
    # Recalculate score
    repaired_score, _, _ = score_submission(optimized_df, check_overlaps=False)
    print(f"Repaired score: {repaired_score:.6f}")
    
    # Verify again
    is_valid, remaining_overlaps = verify_submission_no_overlaps(optimized_df)
    print(f"Is valid after repair: {is_valid}")
    print(f"Remaining overlaps: {remaining_overlaps}")
    
    optimized_score = repaired_score

In [8]:
# Save the optimized submission
print("\nSaving optimized submission...")

# Save to experiment folder
optimized_df.to_csv('submission.csv', index=False)
print(f"Saved to {work_dir}/submission.csv")

# Copy to submission folder
shutil.copy('submission.csv', '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

# Save metrics
metrics = {
    'cv_score': optimized_score,
    'ensemble_score_before': ensemble_score,
    'baseline_score': baseline_score,
    'improvement_from_bbox3': ensemble_score - optimized_score,
    'total_improvement_over_baseline': baseline_score - optimized_score,
    'is_valid': is_valid,
    'overlaps_repaired': overlapping_ns if overlapping_ns else []
}
with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\nMetrics saved: {metrics}")


Saving optimized submission...
Saved to /home/code/experiments/004_optimize_ensemble/submission.csv
Copied to /home/submission/submission.csv

Metrics saved: {'cv_score': 70.61578810306398, 'ensemble_score_before': 70.61578810306398, 'baseline_score': 70.67610239809187, 'improvement_from_bbox3': 0.0, 'total_improvement_over_baseline': 0.06031429502789365, 'is_valid': True, 'overlaps_repaired': []}


In [9]:
# Summary
print("=" * 60)
print("EXPERIMENT 004: OPTIMIZE ENSEMBLE SUMMARY")
print("=" * 60)
print(f"Baseline score: {baseline_score:.6f}")
print(f"Safe ensemble score (before): {ensemble_score:.6f}")
print(f"Optimized score (after bbox3): {optimized_score:.6f}")
print(f"Improvement from bbox3: {ensemble_score - optimized_score:.6f}")
print(f"Total improvement over baseline: {baseline_score - optimized_score:.6f}")
print(f"Is valid: {is_valid}")
print(f"\nTarget: 68.888293")
print(f"Gap to target: {optimized_score - 68.888293:.6f} ({(optimized_score - 68.888293) / 68.888293 * 100:.2f}%)")
print("=" * 60)

EXPERIMENT 004: OPTIMIZE ENSEMBLE SUMMARY
Baseline score: 70.676102
Safe ensemble score (before): 70.615788
Optimized score (after bbox3): 70.615788
Improvement from bbox3: 0.000000
Total improvement over baseline: 0.060314
Is valid: True

Target: 68.888293
Gap to target: 1.727495 (2.51%)
