# Loop 16 Analysis: Understanding the Gap

## Current Status
- Best CV/LB: 70.630465
- Target: 68.919154
- Gap: 1.711 points (2.42%)

## Key Question
Why is there such a large gap? What approaches haven't been tried?

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import matplotlib.pyplot as plt
import os

getcontext().prec = 25
scale_factor = Decimal("1")

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h
        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def get_tree_list_side_length(tree_list):
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return Decimal(max(bounds[2] - bounds[0], bounds[3] - bounds[1])) / scale_factor

def get_total_score(dict_of_side_length):
    score = 0
    for k, v in dict_of_side_length.items():
        score += v ** 2 / Decimal(k)
    return score

def parse_csv(csv_path):
    result = pd.read_csv(csv_path)
    result['x'] = result['x'].str.strip('s')
    result['y'] = result['y'].str.strip('s')
    result['deg'] = result['deg'].str.strip('s')
    result[['group_id', 'item_id']] = result['id'].str.split('_', n=2, expand=True)
    dict_of_tree_list = {}
    dict_of_side_length = {}
    for group_id, group_data in result.groupby('group_id'):
        tree_list = [ChristmasTree(center_x=row['x'], center_y=row['y'], angle=row['deg']) for _, row in group_data.iterrows()]
        dict_of_tree_list[group_id] = tree_list
        dict_of_side_length[group_id] = get_tree_list_side_length(tree_list)
    return dict_of_tree_list, dict_of_side_length

print('Loaded helper functions')

Loaded helper functions


In [2]:
# Load current best
dict_of_tree_list, dict_of_side_length = parse_csv('/home/code/exploration/datasets/ensemble_best.csv')
current_score = get_total_score(dict_of_side_length)
print(f'Current score: {current_score:.8f}')
print(f'Target: 68.919154')
print(f'Gap: {float(current_score) - 68.919154:.6f} ({(float(current_score) - 68.919154) / 68.919154 * 100:.2f}%)')

Current score: 70.63047845
Target: 68.919154
Gap: 1.711324 (2.48%)


In [3]:
# Analyze per-N scores
per_n_scores = []
for n in range(1, 201):
    key = f'{n:03d}'
    side = dict_of_side_length[key]
    n_score = float(side ** 2 / Decimal(n))
    per_n_scores.append({'n': n, 'side': float(side), 'score': n_score})

df = pd.DataFrame(per_n_scores)
print('Score distribution by N:')
print(f'  N=1-10: {df[df.n <= 10].score.sum():.4f}')
print(f'  N=11-50: {df[(df.n > 10) & (df.n <= 50)].score.sum():.4f}')
print(f'  N=51-100: {df[(df.n > 50) & (df.n <= 100)].score.sum():.4f}')
print(f'  N=101-150: {df[(df.n > 100) & (df.n <= 150)].score.sum():.4f}')
print(f'  N=151-200: {df[(df.n > 150) & (df.n <= 200)].score.sum():.4f}')
print(f'  Total: {df.score.sum():.4f}')

Score distribution by N:
  N=1-10: 4.3291
  N=11-50: 14.7050
  N=51-100: 17.6170
  N=101-150: 17.1364
  N=151-200: 16.8430
  Total: 70.6305


In [4]:
# Check ALL snapshots for better solutions
snapshot_dir = '/home/nonroot/snapshots/santa-2025/'
snapshots = sorted(os.listdir(snapshot_dir))

print(f'Found {len(snapshots)} snapshots')
print('\nChecking ALL snapshots for scores...')

best_snapshot = None
best_snapshot_score = float('inf')

for snap_id in snapshots:
    snap_path = os.path.join(snapshot_dir, snap_id, 'submission.csv')
    if os.path.exists(snap_path):
        try:
            _, side_lengths = parse_csv(snap_path)
            score = float(get_total_score(side_lengths))
            if score < best_snapshot_score:
                best_snapshot_score = score
                best_snapshot = snap_id
                print(f'  NEW BEST: {snap_id}: {score:.6f}')
        except Exception as e:
            pass

print(f'\nBest snapshot: {best_snapshot} with score {best_snapshot_score:.6f}')
print(f'Current best: {float(current_score):.6f}')
print(f'Difference: {float(current_score) - best_snapshot_score:.6f}')

Found 67 snapshots

Checking ALL snapshots for scores...

Best snapshot: None with score inf
Current best: 70.630478
Difference: -inf


In [5]:
# Check ALL snapshots for better solutions - correct path
snapshot_dir = '/home/nonroot/snapshots/santa-2025/'
snapshots = sorted(os.listdir(snapshot_dir))

print(f'Found {len(snapshots)} snapshots')
print('\nChecking ALL snapshots for scores...')

best_snapshot = None
best_snapshot_score = float('inf')
snapshot_scores = []

for snap_id in snapshots:
    snap_path = os.path.join(snapshot_dir, snap_id, 'submission', 'submission.csv')
    if os.path.exists(snap_path):
        try:
            _, side_lengths = parse_csv(snap_path)
            score = float(get_total_score(side_lengths))
            snapshot_scores.append({'id': snap_id, 'score': score})
            if score < best_snapshot_score:
                best_snapshot_score = score
                best_snapshot = snap_id
                print(f'  NEW BEST: {snap_id}: {score:.6f}')
        except Exception as e:
            pass

print(f'\nBest snapshot: {best_snapshot} with score {best_snapshot_score:.6f}')
print(f'Current best: {float(current_score):.6f}')
print(f'Difference: {float(current_score) - best_snapshot_score:.6f}')

Found 67 snapshots

Checking ALL snapshots for scores...


  NEW BEST: 20952569566: 163.194569


  NEW BEST: 20971964134: 87.804045


  NEW BEST: 21016257921: 87.364112


  NEW BEST: 21090949260: 84.711359


  NEW BEST: 21104669204: 70.734327


  NEW BEST: 21105319338: 70.734327


  NEW BEST: 21116303805: 70.676102


  NEW BEST: 21121943993: 70.676102


  NEW BEST: 21123768399: 70.676102


  NEW BEST: 21145965159: 70.659944


  NEW BEST: 21145966992: 70.572798



Best snapshot: 21145966992 with score 70.572798
Current best: 70.630478
Difference: 0.057680


In [7]:
# Check if best snapshot has overlaps
def has_collision(trees):
    if len(trees) <= 1:
        return False
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if trees[i].polygon.intersects(trees[j].polygon) and not trees[i].polygon.touches(trees[j].polygon):
                return True
    return False

best_snap_path = '/home/nonroot/snapshots/santa-2025/21145966992/submission/submission.csv'
snap_trees, snap_sides = parse_csv(best_snap_path)

print('Checking best snapshot for overlaps...')
overlap_count = 0
for n in range(1, 201):
    key = f'{n:03d}'
    if has_collision(snap_trees[key]):
        overlap_count += 1
        if overlap_count <= 10:
            print(f'  N={n}: HAS OVERLAPS')

print(f'\nTotal N values with overlaps: {overlap_count}')
if overlap_count > 0:
    print('INVALID - cannot use this snapshot')
else:
    print('VALID - no overlaps!')

Checking best snapshot for overlaps...
  N=2: HAS OVERLAPS
  N=4: HAS OVERLAPS
  N=5: HAS OVERLAPS
  N=16: HAS OVERLAPS
  N=35: HAS OVERLAPS
  N=36: HAS OVERLAPS
  N=40: HAS OVERLAPS
  N=46: HAS OVERLAPS
  N=47: HAS OVERLAPS
  N=48: HAS OVERLAPS



Total N values with overlaps: 72
INVALID - cannot use this snapshot


In [8]:
# Find best VALID snapshot (no overlaps)
print('Finding best VALID snapshot...')

best_valid_snapshot = None
best_valid_score = float('inf')

for snap_id in snapshots:
    snap_path = os.path.join(snapshot_dir, snap_id, 'submission', 'submission.csv')
    if os.path.exists(snap_path):
        try:
            snap_trees, snap_sides = parse_csv(snap_path)
            score = float(get_total_score(snap_sides))
            
            # Check for overlaps
            has_overlaps = False
            for n in range(1, 201):
                key = f'{n:03d}'
                if has_collision(snap_trees[key]):
                    has_overlaps = True
                    break
            
            if not has_overlaps:
                if score < best_valid_score:
                    best_valid_score = score
                    best_valid_snapshot = snap_id
                    print(f'  NEW BEST VALID: {snap_id}: {score:.6f}')
        except Exception as e:
            pass

print(f'\nBest VALID snapshot: {best_valid_snapshot} with score {best_valid_score:.6f}')
print(f'Current best: {float(current_score):.6f}')
print(f'Difference: {float(current_score) - best_valid_score:.6f}')

Finding best VALID snapshot...


  NEW BEST VALID: 20952569566: 163.194569


  NEW BEST VALID: 21086827828: 114.587809


  NEW BEST VALID: 21104669204: 70.734327


  NEW BEST VALID: 21105319338: 70.734327


  NEW BEST VALID: 21116303805: 70.676102


  NEW BEST VALID: 21145968755: 70.659959


  NEW BEST VALID: 21156851249: 70.659437


  NEW BEST VALID: 21165872902: 70.647306


  NEW BEST VALID: 21165874980: 70.630478


  NEW BEST VALID: 21180223864: 70.630465

Best VALID snapshot: 21180223864 with score 70.630465
Current best: 70.630478
Difference: 0.000013
