# Experiment 005: Pure Python SA Optimizer with Shapely

Implementing a Python-based simulated annealing optimizer that uses Shapely for overlap detection.
This guarantees valid results (no overlap detection mismatch with C++ code).

In [1]:
import math
import random
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import time

getcontext().prec = 30
scale_factor = Decimal('1e15')

print("Libraries loaded")

Libraries loaded


In [2]:
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h
        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x * scale_factor), yoff=float(self.center_y * scale_factor))

    def clone(self):
        return ChristmasTree(str(self.center_x), str(self.center_y), str(self.angle))
    
    def set_params(self, x, y, angle):
        self.__init__(str(x), str(y), str(angle))

print("ChristmasTree class defined")

ChristmasTree class defined


In [3]:
def get_side_length(trees):
    """Calculate bounding box side length."""
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1]) / float(scale_factor)

def get_score(trees, n):
    """Calculate score (S^2 / N)."""
    if not trees:
        return 0.0
    side = get_side_length(trees)
    return side ** 2 / n

def has_collision(trees):
    """Check for collisions between trees using Shapely (GROUND TRUTH)."""
    if len(trees) <= 1:
        return False
    for i, tree1 in enumerate(trees):
        for j, tree2 in enumerate(trees):
            if i < j:
                if tree1.polygon.intersects(tree2.polygon) and not tree1.polygon.touches(tree2.polygon):
                    return True
    return False

def has_collision_single(trees, idx):
    """Check if tree at idx collides with any other tree."""
    for j, tree2 in enumerate(trees):
        if idx != j:
            if trees[idx].polygon.intersects(tree2.polygon) and not trees[idx].polygon.touches(tree2.polygon):
                return True
    return False

def load_configuration_from_df(n, df):
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row["x"])[1:] if str(row["x"]).startswith('s') else str(row["x"])
        y = str(row["y"])[1:] if str(row["y"]).startswith('s') else str(row["y"])
        deg = str(row["deg"])[1:] if str(row["deg"]).startswith('s') else str(row["deg"])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

print("Helper functions defined")

Helper functions defined


In [4]:
def simulated_annealing(trees, n_steps=5000, T_max=1.0, T_min=0.001, 
                        position_delta=0.05, angle_delta=5.0, seed=42):
    """
    Pure Python simulated annealing with Shapely collision detection.
    """
    random.seed(seed)
    n = len(trees)
    
    # Clone trees to avoid modifying original
    current_trees = [t.clone() for t in trees]
    best_trees = [t.clone() for t in trees]
    
    current_side = get_side_length(current_trees)
    best_side = current_side
    
    # Cooling schedule
    alpha = (T_min / T_max) ** (1.0 / n_steps)
    T = T_max
    
    accepted = 0
    improved = 0
    
    for step in range(n_steps):
        # Select random tree
        i = random.randint(0, n - 1)
        
        # Save old state
        old_x = current_trees[i].center_x
        old_y = current_trees[i].center_y
        old_angle = current_trees[i].angle
        
        # Perturb (scale by temperature)
        scale = T / T_max
        dx = Decimal(str(random.uniform(-position_delta * scale, position_delta * scale)))
        dy = Decimal(str(random.uniform(-position_delta * scale, position_delta * scale)))
        dangle = Decimal(str(random.uniform(-angle_delta * scale, angle_delta * scale)))
        
        new_x = old_x + dx
        new_y = old_y + dy
        new_angle = (old_angle + dangle) % 360
        
        # Apply perturbation
        current_trees[i].set_params(new_x, new_y, new_angle)
        
        # Check collision using Shapely
        if has_collision_single(current_trees, i):
            # Revert
            current_trees[i].set_params(old_x, old_y, old_angle)
        else:
            # Calculate new side length
            new_side = get_side_length(current_trees)
            delta = new_side - current_side
            
            # Accept or reject
            if delta < 0 or random.random() < math.exp(-delta / T):
                current_side = new_side
                accepted += 1
                
                if new_side < best_side:
                    best_side = new_side
                    best_trees = [t.clone() for t in current_trees]
                    improved += 1
            else:
                # Revert
                current_trees[i].set_params(old_x, old_y, old_angle)
        
        # Cool down
        T *= alpha
    
    return best_trees, best_side, accepted, improved

print("SA function defined")

SA function defined


In [5]:
# Load the repaired baseline
print("Loading repaired baseline...")
df = pd.read_csv('/home/code/experiments/003_preoptimized/repaired_baseline.csv')

# Calculate initial score
initial_total = 0
for n in range(1, 201):
    trees = load_configuration_from_df(n, df)
    if trees:
        initial_total += get_score(trees, n)

print(f"Initial total score: {initial_total:.6f}")
print(f"Gap to target (68.92): {initial_total - 68.922808:.6f} points")

Loading repaired baseline...


Initial total score: 70.682741
Gap to target (68.92): 1.759933 points


In [6]:
# Run SA on a few N values to test
print("\nTesting SA on a few N values...")

test_ns = [10, 20, 50, 100]
for n in test_ns:
    trees = load_configuration_from_df(n, df)
    if trees:
        old_score = get_score(trees, n)
        
        start = time.time()
        new_trees, new_side, accepted, improved = simulated_annealing(
            trees, n_steps=2000, T_max=0.5, T_min=0.001,
            position_delta=0.02, angle_delta=3.0, seed=42
        )
        elapsed = time.time() - start
        
        new_score = get_score(new_trees, n)
        has_overlap = has_collision(new_trees)
        
        print(f"N={n}: {old_score:.6f} -> {new_score:.6f} (overlap={has_overlap}, accepted={accepted}, improved={improved}, time={elapsed:.1f}s)")


Testing SA on a few N values...


N=10: 0.376630 -> 0.376630 (overlap=False, accepted=1699, improved=0, time=1.1s)


N=20: 0.376057 -> 0.376057 (overlap=False, accepted=1278, improved=0, time=2.0s)


N=50: 0.360753 -> 0.360753 (overlap=False, accepted=559, improved=0, time=2.8s)


N=100: 0.345531 -> 0.345531 (overlap=False, accepted=113, improved=0, time=1.8s)


In [7]:
# Run SA on all N values with more iterations
print("\nRunning SA on all N values...")

best_configs = {}
best_scores = {}

start_time = time.time()

for n in range(1, 201):
    trees = load_configuration_from_df(n, df)
    if trees:
        old_score = get_score(trees, n)
        
        # Adjust parameters based on N
        if n <= 20:
            n_steps = 3000
            position_delta = 0.03
            angle_delta = 5.0
        elif n <= 50:
            n_steps = 2000
            position_delta = 0.02
            angle_delta = 3.0
        else:
            n_steps = 1000
            position_delta = 0.01
            angle_delta = 2.0
        
        new_trees, new_side, accepted, improved = simulated_annealing(
            trees, n_steps=n_steps, T_max=0.5, T_min=0.001,
            position_delta=position_delta, angle_delta=angle_delta, seed=42+n
        )
        
        new_score = get_score(new_trees, n)
        
        # Only keep if improved and no overlap
        if new_score < old_score and not has_collision(new_trees):
            best_configs[n] = new_trees
            best_scores[n] = new_score
            if n % 20 == 0:
                print(f"N={n}: IMPROVED {old_score:.6f} -> {new_score:.6f}")
        else:
            best_configs[n] = trees
            best_scores[n] = old_score
            if n % 20 == 0:
                print(f"N={n}: kept original {old_score:.6f}")

elapsed = time.time() - start_time
print(f"\nTotal time: {elapsed:.1f}s")

# Calculate new total score
new_total = sum(best_scores.values())
print(f"\nNew total score: {new_total:.6f}")
print(f"Improvement: {initial_total - new_total:.6f} points")
print(f"Gap to target: {new_total - 68.922808:.6f} points")


Running SA on all N values...


N=20: kept original 0.376057


N=40: kept original 0.362148


N=60: kept original 0.357258


N=80: kept original 0.344881


N=100: kept original 0.345531


N=120: kept original 0.337684


N=140: kept original 0.340098


N=160: kept original 0.339407


N=180: kept original 0.331002


N=200: kept original 0.337731

Total time: 358.2s

New total score: 70.682741
Improvement: 0.000000 points
Gap to target: 1.759933 points


In [None]:
# Verify no overlaps
print("\nVerifying no overlaps...")
overlap_count = 0
for n, trees in best_configs.items():
    if has_collision(trees):
        print(f"  N={n}: OVERLAP!")
        overlap_count += 1

if overlap_count == 0:
    print("All configurations are overlap-free!")
else:
    print(f"Found {overlap_count} configurations with overlaps")

In [None]:
# Save the improved submission
print("\nSaving submission...")

index = [f'{n:03d}_{t}' for n in range(1, 201) for t in range(n)]

tree_data = []
for n in range(1, 201):
    for tree in best_configs[n]:
        tree_data.append([float(tree.center_x), float(tree.center_y), float(tree.angle)])

cols = ['x', 'y', 'deg']
submission = pd.DataFrame(index=index, columns=cols, data=tree_data).rename_axis('id')

for col in cols:
    submission[col] = submission[col].astype(float).round(decimals=6)

for col in submission.columns:
    submission[col] = 's' + submission[col].astype('string')

submission.to_csv('/home/code/experiments/005_python_sa_optimizer/submission.csv')
submission.to_csv('/home/submission/submission.csv')
print("Submission saved!")
print(f"\nFinal score: {new_total:.6f}")