# Loop 16 Analysis: Understanding the Gap and Finding New Approaches

## Current Status
- Best CV/LB: 70.630478
- Target: 68.919154
- Gap: 1.711 points (2.42%)

## Key Insight from Evaluator
The baseline is at an EXTREMELY strong local optimum. All approaches tried (SA, GA, basin hopping, tree removal, cross-N extraction) find the SAME tiny improvement (0.00001345).

## What We Need
A FUNDAMENTALLY DIFFERENT approach that creates solutions in a DIFFERENT basin.

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import matplotlib.pyplot as plt

getcontext().prec = 25
scale_factor = Decimal("1")

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

    def clone(self):
        return ChristmasTree(str(self.center_x), str(self.center_y), str(self.angle))

def get_tree_list_side_length(tree_list):
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return Decimal(max(bounds[2] - bounds[0], bounds[3] - bounds[1])) / scale_factor

def get_total_score(dict_of_side_length):
    score = 0
    for k, v in dict_of_side_length.items():
        score += v ** 2 / Decimal(k)
    return score

def parse_csv(csv_path):
    result = pd.read_csv(csv_path)
    result['x'] = result['x'].str.strip('s')
    result['y'] = result['y'].str.strip('s')
    result['deg'] = result['deg'].str.strip('s')
    result[['group_id', 'item_id']] = result['id'].str.split('_', n=2, expand=True)

    dict_of_tree_list = {}
    dict_of_side_length = {}
    for group_id, group_data in result.groupby('group_id'):
        tree_list = [ChristmasTree(center_x=row['x'], center_y=row['y'], angle=row['deg']) for _, row in group_data.iterrows()]
        dict_of_tree_list[group_id] = tree_list
        dict_of_side_length[group_id] = get_tree_list_side_length(tree_list)

    return dict_of_tree_list, dict_of_side_length

print('Functions loaded')

In [None]:
# Load our best solution
dict_of_tree_list, dict_of_side_length = parse_csv('/home/code/exploration/datasets/ensemble_best.csv')
current_score = get_total_score(dict_of_side_length)
print(f'Current best score: {current_score:.8f}')
print(f'Target: 68.919154')
print(f'Gap: {float(current_score) - 68.919154:.6f} ({(float(current_score) - 68.919154) / 68.919154 * 100:.2f}%)')

In [None]:
# Analyze score breakdown by N
scores_by_n = {}
for n in range(1, 201):
    key = f'{n:03d}'
    side = dict_of_side_length[key]
    score = float(side ** 2 / Decimal(n))
    scores_by_n[n] = score

# Find which N values contribute most to the score
contributions = sorted(scores_by_n.items(), key=lambda x: x[1], reverse=True)
print('Top 20 N values by score contribution:')
for n, score in contributions[:20]:
    print(f'  N={n}: {score:.6f} ({score/float(current_score)*100:.2f}%)')

In [None]:
# Check all available snapshots for better solutions
import os
import glob

snapshot_dir = '/home/nonroot/snapshots/santa-2025/'
snapshots = [d for d in os.listdir(snapshot_dir) if os.path.isdir(os.path.join(snapshot_dir, d)) and d != '.git']

print(f'Found {len(snapshots)} snapshots')

# Check each snapshot for submission.csv
best_snapshot_score = float('inf')
best_snapshot = None

for snapshot in snapshots[:20]:  # Check first 20
    csv_path = os.path.join(snapshot_dir, snapshot, 'submission.csv')
    if os.path.exists(csv_path):
        try:
            _, side_lengths = parse_csv(csv_path)
            score = float(get_total_score(side_lengths))
            if score < best_snapshot_score:
                best_snapshot_score = score
                best_snapshot = snapshot
                print(f'  {snapshot}: {score:.6f}')
        except Exception as e:
            pass

print(f'\nBest snapshot: {best_snapshot} with score {best_snapshot_score:.6f}')

In [None]:
# Check the saspav_csv directory for individual N solutions
saspav_dir = '/home/code/exploration/datasets/saspav_csv/'
if os.path.exists(saspav_dir):
    files = os.listdir(saspav_dir)
    print(f'Found {len(files)} files in saspav_csv')
    print('Sample files:', files[:5])

In [None]:
# Analyze the structure of our best solution for specific N values
# Focus on N values that egortrushin targets: 72, 100, 110, 144, 156, 196, 200

target_ns = [72, 100, 110, 144, 156, 196, 200]

print('Analysis of key N values (egortrushin targets):')
print('='*60)
for n in target_ns:
    key = f'{n:03d}'
    trees = dict_of_tree_list[key]
    side = dict_of_side_length[key]
    score = float(side ** 2 / Decimal(n))
    
    # Get bounding box
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    
    # Check if it's a grid pattern
    angles = [float(t.angle) for t in trees]
    unique_angles = len(set([round(a % 360, 1) for a in angles]))
    
    print(f'N={n}:')
    print(f'  Side: {float(side):.6f}')
    print(f'  Score contribution: {score:.6f}')
    print(f'  Bounding box: {width:.4f} x {height:.4f}')
    print(f'  Unique angles: {unique_angles}')
    print()

## Key Findings

1. **The baseline is at a strong local optimum** - All optimization approaches converge to the same score.

2. **The gap is 1.711 points (2.42%)** - This is too large to close with micro-optimizations.

3. **Egortrushin tessellation approach** creates fundamentally different configurations:
   - Uses translation-based SA (not just rotation)
   - Creates grid patterns that may be in a different basin
   - For N=200, optimizes 210 trees then deletes 10 worst

4. **We have NOT fully implemented the egortrushin approach** - Our previous tessellation attempts were different.

## Recommended Next Steps

1. **Implement FULL egortrushin tessellation SA** for N=72, 100, 110, 144, 156, 196, 200
2. **Submit current best** (70.63046501) to verify improvement
3. **Try asymmetric initial configurations** with high-temperature SA