# Experiment 003: Optimize Individual N Values with C++ Tools

Strategy: The baseline (70.676102) is at a local optimum. Ensemble approaches fail Kaggle validation.

Approach:
1. Start from the baseline (which passes Kaggle)
2. Run C++ optimizer (bbox3) on the FULL baseline with many iterations
3. Check if any improvement is found
4. If improved, verify no overlaps and submit

In [1]:
import sys
sys.path.insert(0, '/home/code')

import pandas as pd
import numpy as np
import subprocess
import shutil
import os
from utils import (
    load_submission, load_trees_for_n, get_trees_data_for_n,
    has_overlap_strict, get_bounding_box_side, calculate_score_for_n,
    score_submission, verify_submission_no_overlaps
)
import json

print("Utilities loaded successfully!")

Utilities loaded successfully!


In [2]:
# Load the baseline
baseline_path = '/home/code/experiments/000_baseline/submission.csv'
baseline_df = load_submission(baseline_path)
print(f"Baseline loaded: {baseline_df.shape}")

# Score the baseline
baseline_score, baseline_scores_by_n, _ = score_submission(baseline_df, check_overlaps=False)
print(f"Baseline score: {baseline_score:.6f}")

Baseline loaded: (20100, 4)


Baseline score: 70.676102


In [3]:
# Copy baseline to working directory
work_dir = '/home/code/experiments/003_cpp_optimization'
os.chdir(work_dir)

shutil.copy(baseline_path, 'submission_best.csv')
print(f"Copied baseline to {work_dir}/submission_best.csv")

Copied baseline to /home/code/experiments/003_cpp_optimization/submission_best.csv


In [4]:
# Check available C++ optimizers
bbox3_path = '/home/nonroot/snapshots/santa-2025/21329069570/code/code/bbox3'
sa_path = '/home/nonroot/snapshots/santa-2025/21329069570/code/code/sa_v1_parallel'

print(f"bbox3 exists: {os.path.exists(bbox3_path)}")
print(f"sa_v1_parallel exists: {os.path.exists(sa_path)}")

bbox3 exists: True
sa_v1_parallel exists: True


In [None]:
# Run bbox3 optimizer on the baseline with many iterations
# This will try to improve the submission in-place
print("Running bbox3 optimizer...")
print("Command: bbox3 -i submission_best.csv -n 50000 -r 10")
print()

result = subprocess.run(
    [bbox3_path, '-i', 'submission_best.csv', '-n', '50000', '-r', '10'],
    capture_output=True,
    text=True,
    timeout=600  # 10 minute timeout
)

print("STDOUT:")
print(result.stdout[-3000:] if len(result.stdout) > 3000 else result.stdout)
print("\nSTDERR:")
print(result.stderr[-1000:] if len(result.stderr) > 1000 else result.stderr)

In [None]:
# Load the optimized submission and check score
optimized_df = load_submission('submission_best.csv')
optimized_score, optimized_scores_by_n, _ = score_submission(optimized_df, check_overlaps=False)

print(f"\nBaseline score: {baseline_score:.6f}")
print(f"Optimized score: {optimized_score:.6f}")
print(f"Improvement: {baseline_score - optimized_score:.6f}")

In [None]:
# Verify no overlaps
print("\nVerifying no overlaps...")
is_valid, overlapping_ns = verify_submission_no_overlaps(optimized_df)
print(f"Is valid: {is_valid}")
print(f"Overlapping N values: {overlapping_ns}")

In [None]:
# If improved and valid, save
if optimized_score < baseline_score and is_valid:
    print("\nImprovement found! Saving...")
    
    # Save to experiment folder
    optimized_df.to_csv('submission.csv', index=False)
    
    # Copy to submission folder
    shutil.copy('submission.csv', '/home/submission/submission.csv')
    print("Copied to /home/submission/submission.csv")
    
    # Save metrics
    metrics = {
        'cv_score': optimized_score,
        'baseline_score': baseline_score,
        'improvement': baseline_score - optimized_score,
        'is_valid': is_valid
    }
    with open('metrics.json', 'w') as f:
        json.dump(metrics, f, indent=2)
    print(f"Metrics saved: {metrics}")
else:
    print("\nNo improvement or invalid submission.")
    print(f"Improvement: {baseline_score - optimized_score:.6f}")
    print(f"Is valid: {is_valid}")
    
    # Still save metrics for logging
    metrics = {
        'cv_score': baseline_score,  # Use baseline since no improvement
        'optimized_score': optimized_score,
        'baseline_score': baseline_score,
        'improvement': baseline_score - optimized_score,
        'is_valid': is_valid
    }
    with open('metrics.json', 'w') as f:
        json.dump(metrics, f, indent=2)
    
    # Copy baseline to submission folder (fallback)
    shutil.copy(baseline_path, '/home/submission/submission.csv')
    print("Copied baseline to /home/submission/submission.csv (fallback)")

In [None]:
# Summary
print("=" * 60)
print("EXPERIMENT 003: C++ OPTIMIZATION SUMMARY")
print("=" * 60)
print(f"Baseline score: {baseline_score:.6f}")
print(f"Optimized score: {optimized_score:.6f}")
print(f"Improvement: {baseline_score - optimized_score:.6f}")
print(f"Is valid: {is_valid}")
print(f"\nTarget: 68.890873")
print(f"Gap to target: {min(baseline_score, optimized_score) - 68.890873:.6f}")
print("=" * 60)