# Loop 16 Analysis: Understanding the Gap

## Current Status
- Best score: 70.630465 (from exp_017)
- Target: 68.919154
- Gap: 1.711 points (2.42%)

## Key Question: What would it take to close this gap?

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import matplotlib.pyplot as plt

getcontext().prec = 25
scale_factor = Decimal("1")

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def get_tree_list_side_length(tree_list):
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return Decimal(max(bounds[2] - bounds[0], bounds[3] - bounds[1])) / scale_factor

def parse_csv(csv_path):
    result = pd.read_csv(csv_path)
    result['x'] = result['x'].str.strip('s')
    result['y'] = result['y'].str.strip('s')
    result['deg'] = result['deg'].str.strip('s')
    result[['group_id', 'item_id']] = result['id'].str.split('_', n=2, expand=True)

    dict_of_tree_list = {}
    dict_of_side_length = {}
    for group_id, group_data in result.groupby('group_id'):
        tree_list = [ChristmasTree(center_x=row['x'], center_y=row['y'], angle=row['deg']) for _, row in group_data.iterrows()]
        dict_of_tree_list[group_id] = tree_list
        dict_of_side_length[group_id] = get_tree_list_side_length(tree_list)

    return dict_of_tree_list, dict_of_side_length

print("Loaded utilities")

Loaded utilities


In [2]:
# Load current best
dict_of_tree_list, dict_of_side_length = parse_csv('/home/code/exploration/datasets/ensemble_best.csv')

# Calculate per-N scores
per_n_scores = {}
for n in range(1, 201):
    key = f'{n:03d}'
    side = dict_of_side_length[key]
    score = float(side ** 2 / Decimal(n))
    per_n_scores[n] = score

total_score = sum(per_n_scores.values())
print(f"Total score: {total_score:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {total_score - 68.919154:.6f}")

Total score: 70.630478
Target: 68.919154
Gap: 1.711324


In [3]:
# Analyze score distribution by N
scores_array = np.array([per_n_scores[n] for n in range(1, 201)])

# Group by ranges
ranges = [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]
for start, end in ranges:
    range_score = sum(per_n_scores[n] for n in range(start, end+1))
    pct = range_score / total_score * 100
    print(f"N={start}-{end}: {range_score:.4f} ({pct:.1f}%)")

print(f"\nTotal: {total_score:.6f}")

N=1-10: 4.3291 (6.1%)
N=11-50: 14.7050 (20.8%)
N=51-100: 17.6170 (24.9%)
N=101-150: 17.1364 (24.3%)
N=151-200: 16.8430 (23.8%)

Total: 70.630478


In [4]:
# What improvement per N would we need to hit target?
target = 68.919154
gap = total_score - target

print(f"Gap to close: {gap:.6f}")
print(f"\nIf we improved EVERY N by the same percentage:")
pct_needed = gap / total_score * 100
print(f"  Need {pct_needed:.2f}% improvement across all N")

print(f"\nIf we improved only large N (101-200):")
large_n_score = sum(per_n_scores[n] for n in range(101, 201))
print(f"  Large N contributes: {large_n_score:.4f}")
print(f"  Would need {gap / large_n_score * 100:.2f}% improvement in large N only")

print(f"\nIf we improved only small N (1-50):")
small_n_score = sum(per_n_scores[n] for n in range(1, 51))
print(f"  Small N contributes: {small_n_score:.4f}")
print(f"  Would need {gap / small_n_score * 100:.2f}% improvement in small N only")

Gap to close: 1.711324

If we improved EVERY N by the same percentage:
  Need 2.42% improvement across all N

If we improved only large N (101-200):
  Large N contributes: 33.9794
  Would need 5.04% improvement in large N only

If we improved only small N (1-50):
  Small N contributes: 19.0341
  Would need 8.99% improvement in small N only


In [5]:
# Key insight: What if we could match the PUBLIC LEADERBOARD leader?
# Public LB #1: terry_u16 at 71.19
# Our score: 70.630
# Target: 68.919

print("LEADERBOARD CONTEXT:")
print(f"  Public LB #1 (terry_u16): 71.19")
print(f"  Our current score: 70.630")
print(f"  Target: 68.919")
print(f"")
print(f"  We are BETTER than public leader by: {71.19 - 70.630:.3f} points")
print(f"  Target is BELOW public leader by: {71.19 - 68.919:.3f} points")
print(f"")
print("This means:")
print("  1. The target (68.919) is NOT achievable with any PUBLIC technique")
print("  2. Top teams have PRIVATE techniques not shared publicly")
print("  3. We need to DISCOVER new techniques, not just implement existing ones")

LEADERBOARD CONTEXT:
  Public LB #1 (terry_u16): 71.19
  Our current score: 70.630
  Target: 68.919

  We are BETTER than public leader by: 0.560 points
  Target is BELOW public leader by: 2.271 points

This means:
  1. The target (68.919) is NOT achievable with any PUBLIC technique
  2. Top teams have PRIVATE techniques not shared publicly
  3. We need to DISCOVER new techniques, not just implement existing ones
