# Loop 8 Analysis: Understanding the Situation

## Key Observations:
1. Our baseline score: 70.676102 (CV and LB match)
2. Current #1 on LB: 71.19 (as of web search)
3. Target: 68.919154
4. Gap to target: 1.757 points (2.55%)

**CRITICAL INSIGHT**: Our score of 70.676 is BETTER than the current #1 (71.19)!
This suggests either:
- The leaderboard data is outdated
- Or we're already competitive and need to focus on the target

In [1]:
import pandas as pd
import numpy as np
import os
import glob

# Let's understand the score distribution
print("=== SCORE ANALYSIS ===")
print(f"Our baseline: 70.676102")
print(f"Current #1 on LB: 71.19 (from web search)")
print(f"Target: 68.919154")
print(f"Gap to target: {70.676102 - 68.919154:.6f} ({(70.676102 - 68.919154)/68.919154*100:.2f}%)")
print()
print("If our score is better than #1, we're already competitive!")
print("The question is: how do we close the 1.757 point gap to target?")

=== SCORE ANALYSIS ===
Our baseline: 70.676102
Current #1 on LB: 71.19 (from web search)
Target: 68.919154
Gap to target: 1.756948 (2.55%)

If our score is better than #1, we're already competitive!
The question is: how do we close the 1.757 point gap to target?


In [2]:
# Let's analyze per-N scores to find where improvements are possible
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon

getcontext().prec = 25
scale_factor = Decimal("1e15")

class ChristmasTree:
    def __init__(self, center_x="0", center_y="0", angle="0"):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal("0.15")
        trunk_h = Decimal("0.2")
        base_w = Decimal("0.7")
        mid_w = Decimal("0.4")
        top_w = Decimal("0.25")
        tip_y = Decimal("0.8")
        tier_1_y = Decimal("0.5")
        tier_2_y = Decimal("0.25")
        base_y = Decimal("0.0")
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal("0.0") * scale_factor, tip_y * scale_factor),
            (top_w / Decimal("2") * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal("4") * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal("2") * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal("4") * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal("2") * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal("2")) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal("4")) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal("2")) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal("4")) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal("2")) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x * scale_factor), yoff=float(self.center_y * scale_factor))

def calculate_score(trees):
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / 1e15 for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    return max(max_x - min_x, max_y - min_y) ** 2 / len(trees)

print("Functions defined!")

Functions defined!


In [3]:
# Load baseline and compute per-N scores
baseline_path = '/home/code/experiments/001_baseline/santa-2025.csv'
df = pd.read_csv(baseline_path, dtype=str)

per_n_scores = {}
for n in range(1, 201):
    prefix = f'{n:03d}_'
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = str(row['x']).replace('s', '')
        y = str(row['y']).replace('s', '')
        deg = str(row['deg']).replace('s', '')
        trees.append(ChristmasTree(x, y, deg))
    per_n_scores[n] = calculate_score(trees)

print(f"Total score: {sum(per_n_scores.values()):.6f}")
print(f"\nTop 10 highest per-N scores (most room for improvement):")
sorted_scores = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    print(f"  N={n}: {score:.6f}")

Total score: 70.676102

Top 10 highest per-N scores (most room for improvement):
  N=1: 0.661250
  N=2: 0.450779
  N=3: 0.434745
  N=5: 0.416850
  N=4: 0.416545
  N=7: 0.399897
  N=6: 0.399610
  N=9: 0.387415
  N=8: 0.385407
  N=15: 0.379203


In [4]:
# Calculate theoretical minimum scores
# For N=1, the minimum is when the tree is rotated 45 degrees
# The tree has width 0.7 and height 1.0 (from -0.2 to 0.8)
# At 45 degrees, the bounding box is sqrt(0.7^2 + 1.0^2) / sqrt(2) â‰ˆ 0.86

import math

# Tree dimensions
tree_width = 0.7  # base width
tree_height = 1.0  # from -0.2 to 0.8

# At 45 degrees rotation, the bounding box diagonal becomes the side
diagonal = math.sqrt(tree_width**2 + tree_height**2)
min_side_45 = diagonal / math.sqrt(2)  # This is wrong - let me recalculate

# Actually, for a rectangle rotated 45 degrees:
# The bounding box side = (width + height) / sqrt(2)
min_side_45_correct = (tree_width + tree_height) / math.sqrt(2)

print(f"Tree dimensions: width={tree_width}, height={tree_height}")
print(f"Diagonal: {diagonal:.6f}")
print(f"Min side at 45 degrees (approx): {min_side_45_correct:.6f}")
print(f"Min score for N=1 (approx): {min_side_45_correct**2:.6f}")
print(f"Actual N=1 score: {per_n_scores[1]:.6f}")

Tree dimensions: width=0.7, height=1.0
Diagonal: 1.220656
Min side at 45 degrees (approx): 1.202082
Min score for N=1 (approx): 1.445000
Actual N=1 score: 0.661250


In [5]:
# Let's look at what the target score implies
target = 68.919154
current = 70.676102
gap = current - target

print(f"=== GAP ANALYSIS ===")
print(f"Current: {current:.6f}")
print(f"Target: {target:.6f}")
print(f"Gap: {gap:.6f}")
print()

# If we need to reduce by 1.757 points across 200 N values
avg_reduction_per_n = gap / 200
print(f"Average reduction needed per N: {avg_reduction_per_n:.6f}")
print()

# But the reduction should be proportional to current score
print("Proportional reduction needed per N:")
for n in [1, 2, 5, 10, 50, 100, 200]:
    current_n = per_n_scores[n]
    reduction = current_n * (gap / current)
    print(f"  N={n}: current={current_n:.6f}, need reduction of {reduction:.6f} ({reduction/current_n*100:.2f}%)")

=== GAP ANALYSIS ===
Current: 70.676102
Target: 68.919154
Gap: 1.756948

Average reduction needed per N: 0.008785

Proportional reduction needed per N:
  N=1: current=0.661250, need reduction of 0.016438 (2.49%)
  N=2: current=0.450779, need reduction of 0.011206 (2.49%)
  N=5: current=0.416850, need reduction of 0.010363 (2.49%)
  N=10: current=0.376630, need reduction of 0.009363 (2.49%)
  N=50: current=0.360753, need reduction of 0.008968 (2.49%)
  N=100: current=0.345531, need reduction of 0.008590 (2.49%)
  N=200: current=0.337731, need reduction of 0.008396 (2.49%)


In [6]:
# Let's check if there are any CSV files with better scores for specific N values
import glob

csv_files = glob.glob('/home/nonroot/snapshots/santa-2025/**/*.csv', recursive=True)
print(f"Found {len(csv_files)} CSV files")

# Sample a few files and check their per-N scores
better_n_scores = {n: [] for n in range(1, 201)}

for filepath in csv_files[:50]:  # Check first 50 files
    try:
        df_check = pd.read_csv(filepath, dtype=str)
        if 'id' not in df_check.columns or 'x' not in df_check.columns:
            continue
        
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            rows = df_check[df_check['id'].str.startswith(prefix)]
            if len(rows) != n:
                continue
            trees = []
            for _, row in rows.iterrows():
                x = str(row['x']).replace('s', '')
                y = str(row['y']).replace('s', '')
                deg = str(row['deg']).replace('s', '')
                trees.append(ChristmasTree(x, y, deg))
            
            score = calculate_score(trees)
            if score < per_n_scores[n] - 0.0001:  # Better by at least 0.0001
                better_n_scores[n].append((filepath, score))
    except Exception as e:
        continue

# Report N values with better scores found
print("\nN values with better scores found in other files:")
for n in range(1, 201):
    if better_n_scores[n]:
        best = min(better_n_scores[n], key=lambda x: x[1])
        improvement = per_n_scores[n] - best[1]
        print(f"  N={n}: baseline={per_n_scores[n]:.6f}, best={best[1]:.6f}, improvement={improvement:.6f}")

Found 726 CSV files



N values with better scores found in other files:
