# Experiment 004: 3-Phase bbox3 Optimization

Implementing the yongsukprasertsuk kernel's 3-phase optimization approach:
- Phase A: Short runs (2 min) with varied parameters to find promising (n, r) combinations
- Phase B: Medium runs (10 min) on top candidates
- Phase C: Long runs (20 min) on best candidates

In [3]:
import os
import subprocess
import shutil
import time
import re
import json
import pandas as pd
import numpy as np
from datetime import datetime

# Configuration
BBOX3_BIN = '/home/code/exploration/datasets/bbox3_compiled'
BASELINE_CSV = '/home/code/exploration/datasets/submission.csv'
WORK_DIR = '/home/code/experiments/004_bbox3_optimization'
WORK_CSV = os.path.join(WORK_DIR, 'submission.csv')

# Copy baseline to working directory
shutil.copy(BASELINE_CSV, WORK_CSV)
print(f"Copied baseline to {WORK_CSV}")

# Parse final score from bbox3 output
FINAL_SCORE_RE = re.compile(r'Final Score:\s*([0-9]+(?:\.[0-9]+)?)')

def parse_bbox3_score(stdout):
    m = FINAL_SCORE_RE.search(stdout or '')
    return float(m.group(1)) if m else None

def run_bbox3(n_iters, r_value, timeout_sec=120):
    """Run bbox3 optimizer with given parameters."""
    start = time.time()
    try:
        result = subprocess.run(
            [BBOX3_BIN, '-n', str(n_iters), '-r', str(r_value)],
            capture_output=True,
            text=True,
            timeout=timeout_sec,
            cwd=WORK_DIR
        )
        elapsed = time.time() - start
        score = parse_bbox3_score(result.stdout)
        return {'score': score, 'elapsed': elapsed, 'stdout': result.stdout, 'stderr': result.stderr}
    except subprocess.TimeoutExpired:
        return {'score': None, 'elapsed': timeout_sec, 'stdout': '', 'stderr': 'TIMEOUT'}

print("Functions defined. Ready to run optimization.")

Copied baseline to /home/code/experiments/004_bbox3_optimization/submission.csv
Functions defined. Ready to run optimization.


In [4]:
# Get initial score
print("Getting initial score...")
result = run_bbox3(100, 30, timeout_sec=60)  # Quick run to get initial score
print(f"stdout tail: {result['stdout'][-500:] if result['stdout'] else 'None'}")
initial_score = result['score']
if initial_score is None:
    initial_score = 70.647327  # Use known baseline
    print(f"Could not parse score, using known baseline: {initial_score}")
else:
    print(f"Initial score: {initial_score}")
print(f"Target: 68.919")
print(f"Gap: {initial_score - 68.919:.6f}")

Getting initial score...


stdout tail: .361339243106 -> 0.361339243106 (0.0000% better) üèÜ
[9] n= 60: 0.357258405133 -> 0.357258405133 (0.0000% better) üèÜ
[3] n=  3: 0.434745139035 -> 0.434745139035 (0.0000% better) üèÜ
[18] n=128: 0.340973666832 -> 0.340973578908 (0.0000% better) üèÜ

Optimization Complete
Initial Score: 70.647326897637
Final Score:   70.647326809705
Improvement:   0.000000087931 (0.00%)
Total Time:    37.6s
Saved results to: submission.csv

Initial score: 70.647326809705
Target: 68.919
Gap: 1.728327


In [5]:
# Phase A: Short runs with varied parameters
print("\n" + "="*60)
print("PHASE A: Short runs (2 min each) to find promising parameters")
print("="*60)

# Reset to baseline before Phase A
shutil.copy(BASELINE_CSV, WORK_CSV)

phase_a_params = {
    'n_values': [1000, 1500, 2000, 3000],
    'r_values': [30, 60, 90],
    'timeout': 120  # 2 minutes
}

phase_a_results = []
best_score = initial_score

for r in phase_a_params['r_values']:
    for n in phase_a_params['n_values']:
        print(f"\n[Phase A] Running n={n}, r={r}, timeout={phase_a_params['timeout']}s...")
        
        # Reset to baseline before each run
        shutil.copy(BASELINE_CSV, WORK_CSV)
        
        result = run_bbox3(n, r, timeout_sec=phase_a_params['timeout'])
        score = result['score']
        
        if score is not None:
            improvement = initial_score - score
            print(f"  Score: {score:.6f} (improvement: {improvement:.6f})")
            phase_a_results.append({
                'n': n, 'r': r, 'score': score, 
                'improvement': improvement, 'elapsed': result['elapsed']
            })
            if score < best_score:
                best_score = score
                print(f"  NEW BEST! Saving...")
                shutil.copy(WORK_CSV, os.path.join(WORK_DIR, f'best_a_n{n}_r{r}.csv'))
        else:
            print(f"  TIMEOUT or ERROR")

print(f"\nPhase A complete. Best score: {best_score:.6f}")


PHASE A: Short runs (2 min each) to find promising parameters

[Phase A] Running n=1000, r=30, timeout=120s...


  Score: 70.647327 (improvement: 0.000000)
  NEW BEST! Saving...

[Phase A] Running n=1500, r=30, timeout=120s...


  Score: 70.647327 (improvement: 0.000000)

[Phase A] Running n=2000, r=30, timeout=120s...


  Score: 70.647327 (improvement: 0.000000)
  NEW BEST! Saving...

[Phase A] Running n=3000, r=30, timeout=120s...


  Score: 70.647327 (improvement: 0.000000)

[Phase A] Running n=1000, r=60, timeout=120s...


  Score: 70.647326 (improvement: 0.000001)
  NEW BEST! Saving...

[Phase A] Running n=1500, r=60, timeout=120s...


  Score: 70.647327 (improvement: 0.000000)

[Phase A] Running n=2000, r=60, timeout=120s...


  Score: 70.647326 (improvement: 0.000000)

[Phase A] Running n=3000, r=60, timeout=120s...


  Score: 70.647327 (improvement: 0.000000)

[Phase A] Running n=1000, r=90, timeout=120s...


  Score: 70.647327 (improvement: 0.000000)

[Phase A] Running n=1500, r=90, timeout=120s...


  Score: 70.647327 (improvement: 0.000000)

[Phase A] Running n=2000, r=90, timeout=120s...


  TIMEOUT or ERROR

[Phase A] Running n=3000, r=90, timeout=120s...


  TIMEOUT or ERROR

Phase A complete. Best score: 70.647326


In [6]:
# Sort Phase A results by score
phase_a_results.sort(key=lambda x: x['score'])
print("\nPhase A Results (sorted by score):")
for i, r in enumerate(phase_a_results[:10]):
    print(f"  {i+1}. n={r['n']}, r={r['r']}: score={r['score']:.6f}, improvement={r['improvement']:.6f}")

# Select top candidates for Phase B
top_candidates = phase_a_results[:5] if len(phase_a_results) >= 5 else phase_a_results
print(f"\nSelected {len(top_candidates)} candidates for Phase B")


Phase A Results (sorted by score):
  1. n=1000, r=60: score=70.647326, improvement=0.000001
  2. n=2000, r=60: score=70.647326, improvement=0.000000
  3. n=1500, r=90: score=70.647327, improvement=0.000000
  4. n=2000, r=30: score=70.647327, improvement=0.000000
  5. n=3000, r=30: score=70.647327, improvement=0.000000
  6. n=1000, r=30: score=70.647327, improvement=0.000000
  7. n=3000, r=60: score=70.647327, improvement=0.000000
  8. n=1500, r=30: score=70.647327, improvement=0.000000
  9. n=1500, r=60: score=70.647327, improvement=0.000000
  10. n=1000, r=90: score=70.647327, improvement=0.000000

Selected 5 candidates for Phase B


In [7]:
# Phase B: Medium runs on top candidates
print("\n" + "="*60)
print("PHASE B: Medium runs (5 min each) on top candidates")
print("="*60)

phase_b_results = []

for candidate in top_candidates:
    n, r = candidate['n'], candidate['r']
    print(f"\n[Phase B] Running n={n*2}, r={r}, timeout=300s...")
    
    # Reset to baseline
    shutil.copy(BASELINE_CSV, WORK_CSV)
    
    result = run_bbox3(n*2, r, timeout_sec=300)  # 5 minutes, double iterations
    score = result['score']
    
    if score is not None:
        improvement = initial_score - score
        print(f"  Score: {score:.6f} (improvement: {improvement:.6f})")
        phase_b_results.append({
            'n': n*2, 'r': r, 'score': score,
            'improvement': improvement, 'elapsed': result['elapsed']
        })
        if score < best_score:
            best_score = score
            print(f"  NEW BEST! Saving...")
            shutil.copy(WORK_CSV, os.path.join(WORK_DIR, f'best_b_n{n*2}_r{r}.csv'))
    else:
        print(f"  TIMEOUT or ERROR")

print(f"\nPhase B complete. Best score: {best_score:.6f}")


PHASE B: Medium runs (5 min each) on top candidates

[Phase B] Running n=2000, r=60, timeout=300s...


  Score: 70.647327 (improvement: 0.000000)

[Phase B] Running n=4000, r=60, timeout=300s...


  Score: 70.647326 (improvement: 0.000001)
  NEW BEST! Saving...

[Phase B] Running n=3000, r=90, timeout=300s...


  Score: 70.647327 (improvement: 0.000000)

[Phase B] Running n=4000, r=30, timeout=300s...


  Score: 70.647327 (improvement: 0.000000)

[Phase B] Running n=6000, r=30, timeout=300s...


  Score: 70.647327 (improvement: 0.000000)

Phase B complete. Best score: 70.647326


In [None]:
# Phase C: Long run on best candidate
print("\n" + "="*60)
print("PHASE C: Long run (10 min) on best candidate")
print("="*60)

# Find best from Phase B
if phase_b_results:
    phase_b_results.sort(key=lambda x: x['score'])
    best_candidate = phase_b_results[0]
    n, r = best_candidate['n'], best_candidate['r']
    
    print(f"\n[Phase C] Running n={n*2}, r={r}, timeout=600s...")
    
    # Reset to baseline
    shutil.copy(BASELINE_CSV, WORK_CSV)
    
    result = run_bbox3(n*2, r, timeout_sec=600)  # 10 minutes
    score = result['score']
    
    if score is not None:
        improvement = initial_score - score
        print(f"  Score: {score:.6f} (improvement: {improvement:.6f})")
        if score < best_score:
            best_score = score
            print(f"  NEW BEST! Saving...")
            shutil.copy(WORK_CSV, os.path.join(WORK_DIR, 'best_final.csv'))

print(f"\nPhase C complete. Final best score: {best_score:.6f}")

In [None]:
# Summary and save final submission
print("\n" + "="*60)
print("OPTIMIZATION SUMMARY")
print("="*60)

print(f"Initial score: {initial_score:.6f}")
print(f"Final best score: {best_score:.6f}")
print(f"Total improvement: {initial_score - best_score:.6f}")
print(f"Target: 68.919")
print(f"Gap to target: {best_score - 68.919:.6f}")

# Find the best submission file
best_files = [f for f in os.listdir(WORK_DIR) if f.startswith('best_') and f.endswith('.csv')]
if best_files:
    # Use the most recent best file
    best_file = max(best_files, key=lambda f: os.path.getmtime(os.path.join(WORK_DIR, f)))
    print(f"\nBest submission file: {best_file}")
    
    # Copy to submission folder
    os.makedirs('/home/submission', exist_ok=True)
    shutil.copy(os.path.join(WORK_DIR, best_file), '/home/submission/submission.csv')
    print("Copied to /home/submission/submission.csv")
else:
    # Use baseline if no improvement
    print("\nNo improvement found. Using baseline.")
    shutil.copy(BASELINE_CSV, '/home/submission/submission.csv')

In [None]:
# Save metrics
metrics = {
    'cv_score': best_score,
    'initial_score': initial_score,
    'improvement': initial_score - best_score,
    'phase_a_results': phase_a_results,
    'phase_b_results': phase_b_results if 'phase_b_results' in dir() else [],
    'target': 68.919,
    'gap_to_target': best_score - 68.919
}

with open(os.path.join(WORK_DIR, 'metrics.json'), 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nSaved metrics to {WORK_DIR}/metrics.json")
print(f"\nFinal CV Score: {best_score:.6f}")