# Evolver Loop 2 Analysis

Analyzing the sa_fast_v2 results and planning next steps.

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree

getcontext().prec = 25
scale_factor = Decimal('1e15')

In [2]:
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

In [3]:
def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

def get_side_length(trees):
    if not trees:
        return Decimal('0')
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor
    width = maxx - minx
    height = maxy - miny
    return max(width, height)

def calculate_score(side_lengths):
    score = Decimal('0')
    for n, side in side_lengths.items():
        score += side ** 2 / Decimal(str(n))
    return float(score)

In [4]:
def load_submission(csv_path):
    df = pd.read_csv(csv_path)
    df['x'] = df['x'].astype(str).str.lstrip('s')
    df['y'] = df['y'].astype(str).str.lstrip('s')
    df['deg'] = df['deg'].astype(str).str.lstrip('s')
    
    tree_lists = {}
    side_lengths = {}
    
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        group = df[df['id'].str.startswith(prefix)]
        trees = [ChristmasTree(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
        tree_lists[n] = trees
        side_lengths[n] = get_side_length(trees)
    
    return tree_lists, side_lengths

In [5]:
# Load baseline and sa_fast_v2 optimized submission
print("Loading baseline...")
baseline_trees, baseline_sides = load_submission('/home/submission/submission.csv')
baseline_score = calculate_score(baseline_sides)
print(f"Baseline score: {baseline_score:.6f}")

print("\nLoading sa_fast_v2 optimized...")
opt_trees, opt_sides = load_submission('/home/code/submission2.csv')
opt_score = calculate_score(opt_sides)
print(f"Optimized score: {opt_score:.6f}")

print(f"\nImprovement: {baseline_score - opt_score:.6f}")

Loading baseline...


Baseline score: 70.627582

Loading sa_fast_v2 optimized...


Optimized score: 70.625972

Improvement: 0.001611


In [6]:
# Check for overlaps in optimized submission
print("Checking for overlaps in optimized submission...")
overlap_count = 0
for n in range(1, 201):
    if has_overlap(opt_trees[n]):
        overlap_count += 1
        if overlap_count <= 10:
            print(f"  N={n}: has overlaps")

print(f"\nTotal N values with overlaps: {overlap_count}")
if overlap_count == 0:
    print("VALID submission - no overlaps!")

Checking for overlaps in optimized submission...
  N=2: has overlaps
  N=4: has overlaps
  N=6: has overlaps
  N=8: has overlaps
  N=9: has overlaps
  N=10: has overlaps
  N=11: has overlaps
  N=12: has overlaps
  N=13: has overlaps
  N=14: has overlaps



Total N values with overlaps: 116


In [7]:
# Compare side lengths for each N
print("Comparing side lengths...")
improvements = []
for n in range(1, 201):
    base_side = float(baseline_sides[n])
    opt_side = float(opt_sides[n])
    diff = base_side - opt_side
    if diff > 1e-8:
        improvements.append((n, base_side, opt_side, diff))

print(f"\nNumber of improved N values: {len(improvements)}")
print("\nTop 20 improvements:")
for n, base, opt, diff in sorted(improvements, key=lambda x: -x[3])[:20]:
    print(f"  N={n:3d}: {base:.6f} -> {opt:.6f} (improved by {diff:.6f})")

# Calculate total score improvement
total_improvement = sum(diff**2/n for n, _, _, diff in improvements)
print(f"\nTotal score improvement from these changes: {total_improvement:.6f}")

Comparing side lengths...

Number of improved N values: 13

Top 20 improvements:
  N= 88: 5.535470 -> 5.530247 (improved by 0.005223)
  N= 35: 3.582223 -> 3.580625 (improved by 0.001598)
  N= 63: 4.716055 -> 4.714556 (improved by 0.001499)
  N=101: 5.935396 -> 5.934311 (improved by 0.001085)
  N= 76: 5.155810 -> 5.155278 (improved by 0.000531)
  N= 64: 4.724561 -> 4.724147 (improved by 0.000414)
  N= 91: 5.619367 -> 5.619036 (improved by 0.000330)
  N= 94: 5.736474 -> 5.736202 (improved by 0.000272)
  N= 36: 3.591947 -> 3.591842 (improved by 0.000105)
  N= 54: 4.387253 -> 4.387150 (improved by 0.000103)
  N= 43: 3.973594 -> 3.973511 (improved by 0.000084)
  N= 98: 5.847808 -> 5.847740 (improved by 0.000068)
  N= 59: 4.612421 -> 4.612379 (improved by 0.000042)

Total score improvement from these changes: 0.000000


In [8]:
# What's the gap to target?
target = 68.901319
print(f"Target score: {target}")
print(f"Current best: {min(baseline_score, opt_score):.6f}")
print(f"Gap to target: {min(baseline_score, opt_score) - target:.6f}")

Target score: 68.901319
Current best: 70.625972
Gap to target: 1.724653


In [9]:
# Apply REPAIR strategy - keep only valid improvements
print("Applying REPAIR strategy...")

repaired_trees = {}
repaired_sides = {}
stats = {'improved': 0, 'invalid': 0, 'same': 0}

for n in range(1, 201):
    opt_t = opt_trees[n]
    opt_s = opt_sides[n]
    base_t = baseline_trees[n]
    base_s = baseline_sides[n]
    
    # Check if optimized is valid
    if has_overlap(opt_t):
        # Invalid - use baseline
        repaired_trees[n] = base_t
        repaired_sides[n] = base_s
        stats['invalid'] += 1
    elif opt_s < base_s:
        # Valid and better - use optimized
        repaired_trees[n] = opt_t
        repaired_sides[n] = opt_s
        stats['improved'] += 1
        print(f"  N={n}: improved {float(base_s):.6f} -> {float(opt_s):.6f}")
    else:
        # Same or worse - use baseline
        repaired_trees[n] = base_t
        repaired_sides[n] = base_s
        stats['same'] += 1

repaired_score = calculate_score(repaired_sides)
print(f"\nRepair stats: {stats}")
print(f"Repaired score: {repaired_score:.6f}")
print(f"Improvement over baseline: {baseline_score - repaired_score:.6f}")

Applying REPAIR strategy...
  N=35: improved 3.582223 -> 3.580625
  N=36: improved 3.591947 -> 3.591842
  N=43: improved 3.973594 -> 3.973511
  N=54: improved 4.387253 -> 4.387150
  N=59: improved 4.612421 -> 4.612379
  N=63: improved 4.716055 -> 4.714556
  N=64: improved 4.724561 -> 4.724147
  N=74: improved 5.108863 -> 5.108863
  N=76: improved 5.155810 -> 5.155278
  N=88: improved 5.535470 -> 5.530247
  N=91: improved 5.619367 -> 5.619036
  N=94: improved 5.736474 -> 5.736202
  N=98: improved 5.847808 -> 5.847740
  N=101: improved 5.935396 -> 5.934311



Repair stats: {'improved': 14, 'invalid': 116, 'same': 70}
Repaired score: 70.625972
Improvement over baseline: 0.001611


In [None]:
# Analyze where the biggest improvements could come from
# The score formula is: sum(side_length^2 / n) for n=1 to 200

print("Score contribution analysis:")
print("=" * 60)

# Calculate contribution per N
contributions = []
for n in range(1, 201):
    side = float(baseline_sides[n])
    contrib = side**2 / n
    contributions.append((n, side, contrib))

# Sort by contribution
contributions.sort(key=lambda x: -x[2])

print("\nTop 30 N values by score contribution:")
total_score = sum(c[2] for c in contributions)
cumulative = 0
for n, side, contrib in contributions[:30]:
    cumulative += contrib
    pct = contrib / total_score * 100
    cum_pct = cumulative / total_score * 100
    print(f"N={n:3d}: side={side:.6f}, contrib={contrib:.6f} ({pct:.2f}%), cumulative={cum_pct:.1f}%")

print(f"\nTotal score: {total_score:.6f}")
print(f"Target: 68.901319")
print(f"Gap: {total_score - 68.901319:.6f}")