# Experiment 007: SA Optimization with Different Initial Configurations

The key insight: Generate DIFFERENT initial configurations (zaburo grid-based), then OPTIMIZE them with sa_v1_parallel. The raw score doesn't matter - what matters is where they end up AFTER optimization.

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
import subprocess
import json
import os
import shutil

getcontext().prec = 25

# Tree shape coordinates
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

WORK_DIR = '/home/code/experiments/007_sa_optimization'
SA_BIN = '/home/code/exploration/datasets/sa_v1_parallel'
BASELINE_CSV = '/home/code/exploration/datasets/submission.csv'

print("Setup complete")

Setup complete


In [2]:
# Generate zaburo grid-based solutions and save to CSV
print("Generating zaburo grid-based solutions...")

def find_best_trees_simple(n):
    """Simplified zaburo grid generator."""
    best_score, best_config = float("inf"), None
    
    for n_even in range(1, n + 1):
        for n_odd in [n_even, n_even - 1]:
            if n_odd < 0:
                continue
            
            trees = []
            rest = n
            r = 0
            
            while rest > 0:
                m = min(rest, n_even if r % 2 == 0 else n_odd)
                if m <= 0:
                    break
                rest -= m
                
                angle = 0 if r % 2 == 0 else 180
                x_offset = 0 if r % 2 == 0 else 0.35  # Half tree width
                y = r // 2 * 1.0 if r % 2 == 0 else (0.8 + (r - 1) // 2 * 1.0)
                
                for i in range(m):
                    trees.append((0.7 * i + x_offset, y, angle))
                r += 1
            
            if len(trees) != n:
                continue
            
            # Calculate bounding box
            all_x, all_y = [], []
            for x, y, deg in trees:
                # Simple bounding box approximation
                all_x.extend([x - 0.35, x + 0.35])
                all_y.extend([y - 0.2, y + 0.8])
            
            side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
            score = side ** 2
            
            if score < best_score:
                best_score = score
                best_config = trees
    
    return best_score, best_config

# Generate for all N
zaburo_configs = {}
for n in range(1, 201):
    score, config = find_best_trees_simple(n)
    zaburo_configs[n] = config
    if n <= 5 or n % 50 == 0:
        print(f"N={n}: {len(config)} trees")

print(f"Generated {len(zaburo_configs)} configurations")

Generating zaburo grid-based solutions...
N=1: 1 trees
N=2: 2 trees
N=3: 3 trees
N=4: 4 trees
N=5: 5 trees
N=50: 50 trees
N=100: 100 trees


N=150: 150 trees


N=200: 200 trees
Generated 200 configurations


In [3]:
# Save zaburo solutions to CSV format
print("Saving zaburo solutions to CSV...")

rows = []
for n in range(1, 201):
    config = zaburo_configs[n]
    for i, (x, y, deg) in enumerate(config):
        rows.append({
            'id': f"{n:03d}_{i}",
            'x': f"s{x}",
            'y': f"s{y}",
            'deg': f"s{deg}"
        })

zaburo_df = pd.DataFrame(rows)
zaburo_path = os.path.join(WORK_DIR, 'zaburo_initial.csv')
zaburo_df.to_csv(zaburo_path, index=False)
print(f"Saved {len(zaburo_df)} rows to {zaburo_path}")

Saving zaburo solutions to CSV...
Saved 20100 rows to /home/code/experiments/007_sa_optimization/zaburo_initial.csv


In [4]:
# Now optimize the zaburo solutions with sa_v1_parallel
print("\nOptimizing zaburo solutions with sa_v1_parallel...")
print("This may take a few minutes...")

# Run sa_v1_parallel on zaburo solutions
result = subprocess.run(
    [SA_BIN, '-i', 'zaburo_initial.csv', '-n', '10000', '-r', '5'],
    capture_output=True,
    text=True,
    timeout=600,  # 10 minute timeout
    cwd=WORK_DIR
)

print("\nOptimization output (last 30 lines):")
print('\n'.join(result.stdout.split('\n')[-30:]))


Optimizing zaburo solutions with sa_v1_parallel...
This may take a few minutes...



Optimization output (last 30 lines):
n= 11  0.4881 → 0.4749  (+2.7163%)
n= 13  0.4733 → 0.4729  (+0.0884%)
n= 14  0.4608 → 0.4550  (+1.2591%)
n= 16  0.5106 → 0.5089  (+0.3318%)
n= 21  0.5563 → 0.5068  (+8.8913%)
n= 28  0.4985 → 0.4605  (+7.6091%)
n= 31  0.4854 → 0.4824  (+0.6111%)
n= 32  0.4683 → 0.4663  (+0.4403%)

NEW GLOBAL BEST! → 88.3587   saved as  solutions/submission_88.358711.csv

=== Generation 3 ===
n=  2  0.4536 → 0.4508  (+0.6183%)
n=  3  0.4368 → 0.4363  (+0.1097%)
n=  7  0.5187 → 0.5140  (+0.8917%)
n= 14  0.4550 → 0.4544  (+0.1382%)
n= 16  0.5089 → 0.5035  (+1.0542%)
n= 21  0.5068 → 0.5062  (+0.1216%)
n= 31  0.4824 → 0.4815  (+0.1878%)

NEW GLOBAL BEST! → 88.3433   saved as  solutions/submission_88.343287.csv

=== Generation 4 ===
n=  7  0.5140 → 0.5140  (+0.0174%)
n= 14  0.4544 → 0.4538  (+0.1197%)
n= 16  0.5035 → 0.4948  (+1.7334%)
n= 21  0.5062 → 0.4990  (+1.4103%)

NEW GLOBAL BEST! → 88.3268   saved as  solutions/submission_88.326787.csv



In [6]:
# Check if optimized file was created
print(f"Files in work dir: {os.listdir(WORK_DIR)}")

# The optimizer saves to solutions/ subdirectory
solutions_dir = os.path.join(WORK_DIR, 'solutions')
if os.path.exists(solutions_dir):
    solution_files = sorted(os.listdir(solutions_dir))
    print(f"Solution files: {solution_files}")
    
    if solution_files:
        # Get the best (lowest score) file
        best_file = min(solution_files, key=lambda f: float(f.split('_')[1].replace('.csv', '')))
        optimized_path = os.path.join(solutions_dir, best_file)
        print(f"Best optimized file: {optimized_path}")
    else:
        optimized_path = None
else:
    print("No solutions directory found")
    optimized_path = None

Files in work dir: ['sa_opt.ipynb', 'baseline.csv', 'solutions', 'zaburo_initial.csv']
Solution files: ['submission_88.326787.csv', 'submission_88.343287.csv', 'submission_88.358711.csv', 'submission_88.618650.csv']
Best optimized file: /home/code/experiments/007_sa_optimization/solutions/submission_88.326787.csv


In [7]:
# Calculate scores for comparison
def parse_submission(filepath):
    df = pd.read_csv(filepath)
    def parse_val(v):
        if isinstance(v, str) and v.startswith('s'):
            return float(v[1:])
        return float(v)
    df['x_val'] = df['x'].apply(parse_val)
    df['y_val'] = df['y'].apply(parse_val)
    df['deg_val'] = df['deg'].apply(parse_val)
    df['N'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    result = {}
    for n, group in df.groupby('N'):
        trees = list(zip(group['x_val'], group['y_val'], group['deg_val']))
        result[n] = trees
    return result

def create_tree_polygon(x, y, deg):
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = rotate(poly, deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

def calculate_bounding_box_side(trees):
    all_x, all_y = [], []
    for x, y, deg in trees:
        poly = create_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

def calculate_total_score(trees_by_n):
    total = 0
    for n, trees in trees_by_n.items():
        side = calculate_bounding_box_side(trees)
        total += side**2 / n
    return total

# Load and score baseline
baseline_trees = parse_submission(BASELINE_CSV)
baseline_score = calculate_total_score(baseline_trees)
print(f"Baseline score: {baseline_score:.6f}")

# Load and score zaburo initial
zaburo_initial_trees = parse_submission(zaburo_path)
zaburo_initial_score = calculate_total_score(zaburo_initial_trees)
print(f"Zaburo initial score: {zaburo_initial_score:.6f}")

# Load and score optimized zaburo (if exists)
if optimized_path:
    optimized_trees = parse_submission(optimized_path)
    optimized_score = calculate_total_score(optimized_trees)
    print(f"Optimized zaburo score: {optimized_score:.6f}")
    print(f"\nImprovement from optimization: {zaburo_initial_score - optimized_score:.6f}")
    print(f"Comparison to baseline: {baseline_score - optimized_score:.6f}")

Baseline score: 70.647327


Zaburo initial score: 91.650373


Optimized zaburo score: 88.326787

Improvement from optimization: 3.323587
Comparison to baseline: -17.679460


In [8]:
# Check for overlaps in optimized solution
def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [create_tree_polygon(x, y, deg) for x, y, deg in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

if optimized_path:
    print("\nChecking optimized solution for overlaps...")
    overlap_n = []
    for n in range(1, 201):
        if n in optimized_trees:
            if has_overlap(optimized_trees[n]):
                overlap_n.append(n)
    
    if len(overlap_n) == 0:
        print("All configurations are valid (no overlaps)!")
    else:
        print(f"Overlapping N values: {overlap_n[:20]}... (total: {len(overlap_n)})")


Checking optimized solution for overlaps...


Overlapping N values: [15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37]... (total: 183)


In [9]:
# Create ensemble: pick best per N from baseline and optimized zaburo
if optimized_path and len(overlap_n) == 0:
    print("\nCreating ensemble...")
    
    ensemble_source = {}
    ensemble_trees = {}
    
    baseline_wins = 0
    optimized_wins = 0
    
    for n in range(1, 201):
        base_side = calculate_bounding_box_side(baseline_trees[n])
        opt_side = calculate_bounding_box_side(optimized_trees[n])
        
        if opt_side < base_side:
            ensemble_source[n] = 'optimized'
            ensemble_trees[n] = optimized_trees[n]
            optimized_wins += 1
        else:
            ensemble_source[n] = 'baseline'
            ensemble_trees[n] = baseline_trees[n]
            baseline_wins += 1
    
    ensemble_score = calculate_total_score(ensemble_trees)
    print(f"Ensemble score: {ensemble_score:.6f}")
    print(f"Baseline wins: {baseline_wins}, Optimized wins: {optimized_wins}")
    print(f"Improvement over baseline: {baseline_score - ensemble_score:.6f}")
else:
    print("\nUsing baseline (optimized solution has overlaps or doesn't exist)")
    ensemble_trees = baseline_trees
    ensemble_score = baseline_score


Using baseline (optimized solution has overlaps or doesn't exist)


In [10]:
# Save final submission
print("\nSaving final submission...")

rows = []
for n in range(1, 201):
    trees = ensemble_trees[n]
    for i, (x, y, deg) in enumerate(trees):
        rows.append({
            'id': f"{n:03d}_{i}",
            'x': f"s{x}",
            'y': f"s{y}",
            'deg': f"s{deg}"
        })

final_df = pd.DataFrame(rows)
os.makedirs('/home/submission', exist_ok=True)
final_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved {len(final_df)} rows to /home/submission/submission.csv")

# Save metrics
metrics = {
    'cv_score': ensemble_score,
    'baseline_score': baseline_score,
    'zaburo_initial_score': zaburo_initial_score,
    'optimized_score': optimized_score if optimized_path else None,
    'improvement': baseline_score - ensemble_score
}

with open(os.path.join(WORK_DIR, 'metrics.json'), 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nFinal Score: {ensemble_score:.6f}")
print(f"Target: 68.919")
print(f"Gap to target: {ensemble_score - 68.919:.6f}")


Saving final submission...
Saved 20100 rows to /home/submission/submission.csv

Final Score: 70.647327
Target: 68.919
Gap to target: 1.728327
