# sa_fast_v2 Optimization with REPAIR Strategy

Run sa_fast_v2 optimizer, validate results, and apply REPAIR strategy.

In [1]:
import numpy as np
import pandas as pd
import subprocess
import os
import shutil
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree

getcontext().prec = 25
scale_factor = Decimal('1e15')

WORK_DIR = '/home/code/experiments/003_sa_fast_v2'
BASELINE_CSV = os.path.join(WORK_DIR, 'submission1.csv')
OPTIMIZED_CSV = os.path.join(WORK_DIR, 'submission2.csv')

In [2]:
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

In [3]:
def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

def get_side_length(trees):
    if not trees:
        return Decimal('0')
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor
    return max(maxx - minx, maxy - miny)

def calculate_score(side_lengths):
    score = Decimal('0')
    for n, side in side_lengths.items():
        score += side ** 2 / Decimal(str(n))
    return float(score)

def load_submission(csv_path):
    df = pd.read_csv(csv_path)
    df['x'] = df['x'].astype(str).str.lstrip('s')
    df['y'] = df['y'].astype(str).str.lstrip('s')
    df['deg'] = df['deg'].astype(str).str.lstrip('s')
    
    tree_lists = {}
    side_lengths = {}
    
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        group = df[df['id'].str.startswith(prefix)]
        trees = [ChristmasTree(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
        tree_lists[n] = trees
        side_lengths[n] = get_side_length(trees)
    
    return tree_lists, side_lengths

In [4]:
# Load baseline and optimized
print("Loading submissions...")
baseline_trees, baseline_sides = load_submission(BASELINE_CSV)
baseline_score = calculate_score(baseline_sides)
print(f"Baseline score: {baseline_score:.6f}")

optimized_trees, optimized_sides = load_submission(OPTIMIZED_CSV)
optimized_score = calculate_score(optimized_sides)
print(f"Optimized score (before repair): {optimized_score:.6f}")

Loading submissions...


Baseline score: 70.627582


Optimized score (before repair): 70.625972


In [5]:
# Apply REPAIR strategy
print("\nApplying REPAIR strategy...")

repaired_trees = {}
repaired_sides = {}
stats = {'improved': 0, 'invalid': 0, 'worse': 0, 'same': 0}
improvements = []

for n in range(1, 201):
    opt_trees = optimized_trees[n]
    opt_side = optimized_sides[n]
    base_trees = baseline_trees[n]
    base_side = baseline_sides[n]
    
    if has_overlap(opt_trees):
        repaired_trees[n] = base_trees
        repaired_sides[n] = base_side
        stats['invalid'] += 1
    elif opt_side < base_side:
        repaired_trees[n] = opt_trees
        repaired_sides[n] = opt_side
        stats['improved'] += 1
        improvement = float(base_side - opt_side)
        improvements.append((n, improvement))
    elif opt_side > base_side:
        repaired_trees[n] = base_trees
        repaired_sides[n] = base_side
        stats['worse'] += 1
    else:
        repaired_trees[n] = base_trees
        repaired_sides[n] = base_side
        stats['same'] += 1

repaired_score = calculate_score(repaired_sides)
print(f"\nRepair stats: {stats}")
print(f"Repaired score: {repaired_score:.6f}")
print(f"Improvement over baseline: {baseline_score - repaired_score:.6f}")

if improvements:
    print(f"\nValid improvements:")
    for n, imp in sorted(improvements, key=lambda x: -x[1])[:10]:
        print(f"  N={n}: improved by {imp:.6f}")


Applying REPAIR strategy...



Repair stats: {'improved': 14, 'invalid': 116, 'worse': 2, 'same': 68}
Repaired score: 70.625972
Improvement over baseline: 0.001611

Valid improvements:
  N=88: improved by 0.005223
  N=35: improved by 0.001598
  N=63: improved by 0.001499
  N=101: improved by 0.001085
  N=76: improved by 0.000531
  N=64: improved by 0.000414
  N=91: improved by 0.000330
  N=94: improved by 0.000272
  N=36: improved by 0.000105
  N=54: improved by 0.000103


In [6]:
# Save repaired submission
print("\nSaving repaired submission...")

rows = []
for n in range(1, 201):
    trees = repaired_trees[n]
    for t_idx, tree in enumerate(trees):
        rows.append({
            'id': f'{n:03d}_{t_idx}',
            'x': f's{float(tree.center_x):.12f}',
            'y': f's{float(tree.center_y):.12f}',
            'deg': f's{float(tree.angle):.12f}'
        })

submission_df = pd.DataFrame(rows)
submission_df.to_csv(os.path.join(WORK_DIR, 'submission_repaired.csv'), index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved submission")
print(f"\nFinal score: {repaired_score:.6f}")
print(f"Target: 68.901319")
print(f"Gap: {repaired_score - 68.901319:.6f}")


Saving repaired submission...
Saved submission

Final score: 70.625972
Target: 68.901319
Gap: 1.724653


In [8]:
# Run multiple iterations with different seeds
print("Running multiple sa_fast_v2 iterations...")

current_best_trees = repaired_trees.copy()
current_best_sides = repaired_sides.copy()
current_best_score = repaired_score

seeds = [123, 456, 789, 1234, 5678]
total_improvements = 0

for seed in seeds:
    print(f"\n--- Seed {seed} ---")
    
    # Save current best as input
    rows = []
    for n in range(1, 201):
        trees = current_best_trees[n]
        for t_idx, tree in enumerate(trees):
            rows.append({
                'id': f'{n:03d}_{t_idx}',
                'x': f's{float(tree.center_x):.12f}',
                'y': f's{float(tree.center_y):.12f}',
                'deg': f's{float(tree.angle):.12f}'
            })
    pd.DataFrame(rows).to_csv(os.path.join(WORK_DIR, 'submission1.csv'), index=False)
    
    # Run sa_fast_v2 with this seed
    os.chdir(WORK_DIR)
    result = subprocess.run(
        ['/home/code/sa_fast_v2'],
        capture_output=True,
        text=True,
        timeout=600,
        env={**os.environ, 'SEED': str(seed)}
    )
    
    # Load and repair
    opt_trees, opt_sides = load_submission(os.path.join(WORK_DIR, 'submission2.csv'))
    
    iter_improvements = 0
    for n in range(1, 201):
        if not has_overlap(opt_trees[n]) and opt_sides[n] < current_best_sides[n]:
            improvement = float(current_best_sides[n] - opt_sides[n])
            current_best_trees[n] = opt_trees[n]
            current_best_sides[n] = opt_sides[n]
            iter_improvements += 1
            total_improvements += 1
            if improvement > 0.0001:
                print(f"  N={n}: improved by {improvement:.6f}")
    
    new_score = calculate_score(current_best_sides)
    print(f"  Improvements this iteration: {iter_improvements}")
    print(f"  Score: {new_score:.6f} (delta: {current_best_score - new_score:.6f})")
    current_best_score = new_score

print(f"\n=== Final Results ===")
print(f"Total improvements across all seeds: {total_improvements}")
print(f"Final score: {current_best_score:.6f}")
print(f"Total improvement: {baseline_score - current_best_score:.6f}")

Running multiple sa_fast_v2 iterations...

--- Seed 123 ---


  N=36: improved by 0.000278
  N=76: improved by 0.000217
  N=91: improved by 0.000157
  N=94: improved by 0.000470
  N=98: improved by 0.000263
  N=101: improved by 0.000152


  Improvements this iteration: 9
  Score: 70.625730 (delta: 0.000242)

--- Seed 456 ---


  N=35: improved by 0.000945
  N=36: improved by 0.001332
  N=94: improved by 0.000239


  Improvements this iteration: 8
  Score: 70.625227 (delta: 0.000503)

--- Seed 789 ---


  Improvements this iteration: 3
  Score: 70.625224 (delta: 0.000002)

--- Seed 1234 ---


  Improvements this iteration: 1
  Score: 70.625220 (delta: 0.000004)

--- Seed 5678 ---


  Improvements this iteration: 1
  Score: 70.625220 (delta: 0.000000)

=== Final Results ===
Total improvements across all seeds: 22
Final score: 70.625220
Total improvement: 0.002362


In [10]:
# Save the best result so far
print("Saving best result...")

rows = []
for n in range(1, 201):
    trees = current_best_trees[n]
    for t_idx, tree in enumerate(trees):
        rows.append({
            'id': f'{n:03d}_{t_idx}',
            'x': f's{float(tree.center_x):.12f}',
            'y': f's{float(tree.center_y):.12f}',
            'deg': f's{float(tree.angle):.12f}'
        })

submission_df = pd.DataFrame(rows)
submission_df.to_csv(os.path.join(WORK_DIR, 'submission_best.csv'), index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved submission")
print(f"\nFinal score: {current_best_score:.6f}")
print(f"Target: 68.901319")
print(f"Gap: {current_best_score - 68.901319:.6f}")

Saving best result...
Saved submission

Final score: 70.625220
Target: 68.901319
Gap: 1.723901


In [11]:
# Continue with more seeds
print("Running more sa_fast_v2 iterations...")

more_seeds = [9999, 11111, 22222, 33333, 44444, 55555, 66666, 77777, 88888, 99999]

for seed in more_seeds:
    print(f"\n--- Seed {seed} ---")
    
    # Save current best as input
    rows = []
    for n in range(1, 201):
        trees = current_best_trees[n]
        for t_idx, tree in enumerate(trees):
            rows.append({
                'id': f'{n:03d}_{t_idx}',
                'x': f's{float(tree.center_x):.12f}',
                'y': f's{float(tree.center_y):.12f}',
                'deg': f's{float(tree.angle):.12f}'
            })
    pd.DataFrame(rows).to_csv(os.path.join(WORK_DIR, 'submission1.csv'), index=False)
    
    # Run sa_fast_v2
    os.chdir(WORK_DIR)
    result = subprocess.run(
        ['/home/code/sa_fast_v2'],
        capture_output=True,
        text=True,
        timeout=600,
        env={**os.environ, 'SEED': str(seed)}
    )
    
    # Load and repair
    opt_trees, opt_sides = load_submission(os.path.join(WORK_DIR, 'submission2.csv'))
    
    iter_improvements = 0
    for n in range(1, 201):
        if not has_overlap(opt_trees[n]) and opt_sides[n] < current_best_sides[n]:
            improvement = float(current_best_sides[n] - opt_sides[n])
            current_best_trees[n] = opt_trees[n]
            current_best_sides[n] = opt_sides[n]
            iter_improvements += 1
            total_improvements += 1
            if improvement > 0.0001:
                print(f"  N={n}: improved by {improvement:.6f}")
    
    new_score = calculate_score(current_best_sides)
    print(f"  Improvements: {iter_improvements}, Score: {new_score:.6f} (delta: {current_best_score - new_score:.6f})")
    current_best_score = new_score

print(f"\n=== Final Results ===")
print(f"Total improvements: {total_improvements}")
print(f"Final score: {current_best_score:.6f}")
print(f"Total improvement from baseline: {baseline_score - current_best_score:.6f}")

In [13]:
# Save current best result\nprint("Saving current best result...")\n\nfinal_score = calculate_score(current_best_sides)\nprint(f"Current best score: {final_score:.6f}")\n\nrows = []\nfor n in range(1, 201):\n    trees = current_best_trees[n]\n    for t_idx, tree in enumerate(trees):\n        rows.append({\n            'id': f'{n:03d}_{t_idx}',\n            'x': f's{float(tree.center_x):.12f}',\n            'y': f's{float(tree.center_y):.12f}',\n            'deg': f's{float(tree.angle):.12f}'\n        })\n\nsubmission_df = pd.DataFrame(rows)\nsubmission_df.to_csv(os.path.join(WORK_DIR, 'submission_final.csv'), index=False)\nsubmission_df.to_csv('/home/submission/submission.csv', index=False)\nprint(f\"Saved submission\")\nprint(f\"\\nFinal score: {final_score:.6f}\")\nprint(f\"Baseline score: {baseline_score:.6f}\")\nprint(f\"Total improvement: {baseline_score - final_score:.6f}\")\nprint(f\"Target: 68.901319\")\nprint(f\"Gap: {final_score - 68.901319:.6f}\")"