# Experiment 004: sa_fast_v2 Optimization

Try the sa_fast_v2 optimizer (Simulated Annealing with fractional translation) on the VALIDATED baseline.

**Starting point**: candidate_024.csv (LB: 70.626088) - Kaggle validated

In [1]:
import pandas as pd
import numpy as np
import math
from numba import njit
import subprocess
import os
import time
import json

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = mny = 1e300
    mxx = mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c, s = math.cos(r), math.sin(r)
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xs[i]
            Y = s * tx[j] + c * ty[j] + ys[i]
            mnx, mxx = min(mnx, X), max(mxx, X)
            mny, mxy = min(mny, Y), max(mxy, Y)
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def calculate_total_score(df):
    total_score = 0.0
    scores_by_n = {}
    for n in range(1, 201):
        mask = df['id'].str.startswith(f'{n:03d}_')
        group = df[mask]
        if len(group) != n:
            continue
        xs = group['x'].str[1:].astype(float).values
        ys = group['y'].str[1:].astype(float).values
        degs = group['deg'].str[1:].astype(float).values
        score = score_group(xs, ys, degs, TX, TY)
        scores_by_n[n] = score
        total_score += score
    return total_score, scores_by_n

print("Scoring functions defined")

Scoring functions defined


In [2]:
# Calculate initial score of VALIDATED submission
os.chdir('/home/code/experiments/004_sa_fast_v2_optimization')
df_initial = pd.read_csv('submission.csv')
initial_score, initial_scores_by_n = calculate_total_score(df_initial)
print(f"Initial Score: {initial_score:.6f}")
print(f"Expected LB: 70.626088")
print(f"Rows: {len(df_initial)}")
print(f"\nThis is the VALIDATED submission (candidate_024.csv)")

Initial Score: 70.626088
Expected LB: 70.626088
Rows: 20100

This is the VALIDATED submission (candidate_024.csv)


In [3]:
# Check sa_fast_v2 help to understand parameters
result = subprocess.run(['./sa_fast_v2', '--help'], capture_output=True, text=True, timeout=10)
print("stdout:", result.stdout)
print("stderr:", result.stderr)

stdout: Loading csv: submission1.csv

stderr: Error: Could not open input CSV: submission1.csv



In [4]:
# Run sa_fast_v2 optimizer
# NOTE: sa_fast_v2 expects input as "submission1.csv" and outputs to "submission2.csv"

def run_sa_fast_v2(n_iter, rounds, timeout=600):
    """Run sa_fast_v2 optimizer and return the improvement"""
    start_time = time.time()
    
    # Copy current submission to submission1.csv (input)
    import shutil
    shutil.copy('submission.csv', 'submission1.csv')
    
    # Read initial score
    df_before = pd.read_csv('submission1.csv')
    score_before, _ = calculate_total_score(df_before)
    
    # Run sa_fast_v2
    cmd = f'./sa_fast_v2 -n {n_iter} -r {rounds}'
    print(f"Running: {cmd}")
    
    try:
        result = subprocess.run(
            cmd.split(),
            cwd='/home/code/experiments/004_sa_fast_v2_optimization',
            capture_output=True,
            text=True,
            timeout=timeout
        )
        print(f"stdout: {result.stdout[:500]}...")
        if result.stderr:
            print(f"stderr: {result.stderr[:200]}")
    except subprocess.TimeoutExpired:
        print(f"Timeout after {timeout}s")
    
    # Read output from submission2.csv
    if os.path.exists('submission2.csv'):
        df_after = pd.read_csv('submission2.csv')
        score_after, _ = calculate_total_score(df_after)
        # Copy improved result back to submission.csv
        shutil.copy('submission2.csv', 'submission.csv')
        shutil.copy('submission2.csv', 'submission1.csv')  # For next iteration
    else:
        df_after = df_before
        score_after = score_before
        print("No output file generated!")
    
    elapsed = time.time() - start_time
    improvement = score_before - score_after
    
    print(f"Score: {score_before:.6f} -> {score_after:.6f} (improvement: {improvement:.6f})")
    print(f"Time: {elapsed:.1f}s")
    
    return score_after, improvement

print("sa_fast_v2 runner defined")

sa_fast_v2 runner defined


In [5]:
# Run sa_fast_v2 with aggressive parameters
print("=" * 60)
print("Running sa_fast_v2 optimization")
print("=" * 60)

best_score = initial_score
results = []

# Try different parameter combinations
for n_iter in [5000, 10000]:
    for rounds in [50, 80]:
        score, improvement = run_sa_fast_v2(n_iter, rounds, timeout=300)
        results.append({'n': n_iter, 'r': rounds, 'score': score, 'improvement': improvement})
        if score < best_score:
            best_score = score
        print()

Running sa_fast_v2 optimization


Running: ./sa_fast_v2 -n 5000 -r 50


Timeout after 300s


Score: 70.626088 -> 70.623148 (improvement: 0.002940)
Time: 301.1s



Running: ./sa_fast_v2 -n 5000 -r 80


Timeout after 300s


Score: 70.623148 -> 70.622889 (improvement: 0.000260)
Time: 301.2s



Running: ./sa_fast_v2 -n 10000 -r 50


Timeout after 300s


Score: 70.622889 -> 70.622844 (improvement: 0.000044)
Time: 301.2s



Running: ./sa_fast_v2 -n 10000 -r 80


Timeout after 300s


Score: 70.622844 -> 70.622842 (improvement: 0.000002)
Time: 301.2s



In [None]:
# Continue optimization - sa_fast_v2 is working!
print("=" * 60)
print("CONTINUING sa_fast_v2 optimization")
print("=" * 60)

# Run more iterations to squeeze out more improvement
for n_iter in [15000]:
    for rounds in [80, 100]:
        score, improvement = run_sa_fast_v2(n_iter, rounds, timeout=400)
        results.append({'n': n_iter, 'r': rounds, 'score': score, 'improvement': improvement})
        if score < best_score:
            best_score = score
        print()

In [None]:
# Summary
print("\n" + "=" * 60)
print("OPTIMIZATION SUMMARY")
print("=" * 60)
print(f"Initial score (validated): {initial_score:.6f}")
print(f"Final score: {best_score:.6f}")
print(f"Total improvement: {initial_score - best_score:.6f}")
print(f"\nAll runs:")
for r in results:
    print(f"  n={r['n']:5d}, r={r['r']:3d}: score={r['score']:.6f}, improvement={r['improvement']:.6f}")

In [None]:
# Calculate final score and copy to submission folder
df_final = pd.read_csv('submission.csv')
final_score, final_scores_by_n = calculate_total_score(df_final)
print(f"\nFinal verified score: {final_score:.6f}")

import shutil
os.makedirs('/home/submission', exist_ok=True)
shutil.copy('submission.csv', '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

In [None]:
# Save metrics
metrics = {
    'cv_score': final_score,
    'initial_score': initial_score,
    'improvement': initial_score - final_score,
    'optimization_runs': results,
    'source': 'candidate_024.csv (LB validated: 70.626088)'
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics saved. Final CV Score: {final_score:.6f}")