# Experiment 007: Safe Optimization

Start from the ONLY validated submission (candidate_003.csv, LB=70.622435) and run sa_fast_v2.
This ensures we don't introduce overlaps from external sources.

**Key insight**: Previous failures were caused by replacing N values with "better" solutions that had overlaps Kaggle detected but we didn't.

In [1]:
import pandas as pd
import numpy as np
import math
from numba import njit
import subprocess
import os
import time
import json
import shutil

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = mny = 1e300
    mxx = mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c, s = math.cos(r), math.sin(r)
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xs[i]
            Y = s * tx[j] + c * ty[j] + ys[i]
            mnx, mxx = min(mnx, X), max(mxx, X)
            mny, mxy = min(mny, Y), max(mxy, Y)
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def calculate_total_score(df):
    total_score = 0.0
    for n in range(1, 201):
        mask = df['id'].str.startswith(f'{n:03d}_')
        group = df[mask]
        if len(group) != n:
            continue
        xs = group['x'].str[1:].astype(float).values
        ys = group['y'].str[1:].astype(float).values
        degs = group['deg'].str[1:].astype(float).values
        score = score_group(xs, ys, degs, TX, TY)
        total_score += score
    return total_score

print("Scoring functions defined")

Scoring functions defined


In [2]:
# Verify starting point is the validated submission
os.chdir('/home/code/experiments/007_safe_optimization')
df_initial = pd.read_csv('submission.csv')
initial_score = calculate_total_score(df_initial)
print(f"Initial Score: {initial_score:.6f}")
print(f"Expected (validated LB): 70.622435")
print(f"Match: {abs(initial_score - 70.622435) < 0.0001}")

Initial Score: 70.622435
Expected (validated LB): 70.622435
Match: True


In [3]:
# Run sa_fast_v2 optimization
# sa_fast_v2 expects submission1.csv as input and outputs to submission2.csv

def run_sa_fast_v2(n_iter, rounds, timeout=600):
    start_time = time.time()
    
    # Copy current to submission1.csv
    shutil.copy('submission.csv', 'submission1.csv')
    df_before = pd.read_csv('submission1.csv')
    score_before = calculate_total_score(df_before)
    
    cmd = f'./sa_fast_v2 -n {n_iter} -r {rounds}'
    print(f"Running: {cmd}")
    
    try:
        result = subprocess.run(cmd.split(), capture_output=True, text=True, timeout=timeout)
        print(f"stdout: {result.stdout[:300]}...")
    except subprocess.TimeoutExpired:
        print(f"Timeout after {timeout}s")
    
    if os.path.exists('submission2.csv'):
        df_after = pd.read_csv('submission2.csv')
        score_after = calculate_total_score(df_after)
        shutil.copy('submission2.csv', 'submission.csv')
        shutil.copy('submission2.csv', 'submission1.csv')
    else:
        score_after = score_before
        print("No output!")
    
    improvement = score_before - score_after
    print(f"Score: {score_before:.6f} -> {score_after:.6f} (improvement: {improvement:.6f})")
    print(f"Time: {time.time() - start_time:.1f}s")
    return score_after, improvement

print("Optimizer defined")

Optimizer defined


In [None]:
# Run optimization
print("=" * 60)
print("Running sa_fast_v2 on VALIDATED baseline")
print("=" * 60)

results = []
best_score = initial_score

# Run multiple rounds
for n_iter in [5000, 10000, 15000]:
    for rounds in [50, 80]:
        score, improvement = run_sa_fast_v2(n_iter, rounds, timeout=300)
        results.append({'n': n_iter, 'r': rounds, 'score': score, 'improvement': improvement})
        if score < best_score:
            best_score = score
        print()

In [None]:
# Summary
print("\n" + "=" * 60)
print("OPTIMIZATION SUMMARY")
print("=" * 60)
print(f"Initial score (validated LB): {initial_score:.6f}")
print(f"Final score: {best_score:.6f}")
print(f"Total improvement: {initial_score - best_score:.6f}")
print(f"\nAll runs:")
for r in results:
    print(f"  n={r['n']:5d}, r={r['r']:3d}: score={r['score']:.6f}, improvement={r['improvement']:.6f}")

In [None]:
# Save final submission
df_final = pd.read_csv('submission.csv')
final_score = calculate_total_score(df_final)
print(f"\nFinal verified score: {final_score:.6f}")

os.makedirs('/home/submission', exist_ok=True)
shutil.copy('submission.csv', '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

In [None]:
# Save metrics
metrics = {
    'cv_score': final_score,
    'initial_score': initial_score,
    'improvement': initial_score - final_score,
    'optimization_runs': results,
    'source': 'candidate_003.csv (validated LB: 70.622435)',
    'note': 'Safe optimization - started from ONLY validated submission to avoid overlap issues'
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics saved. Final CV Score: {final_score:.6f}")