# Loop 12 Analysis: Understanding the Gap and Finding Better Solutions

## Key Findings from Research:
1. Best public LB score: 71.191427 (terry_u16)
2. Our target: 68.922808 (BETTER than current LB leader!)
3. Our baseline: 70.734327 (with overlap at N=9)
4. Best valid baseline: 70.750676 (bucket-of-chump)

## Critical Insight:
The target of 68.922808 is BELOW the current leaderboard leader (71.19). This means:
- The target IS achievable
- We need to find techniques that beat the current best public solutions
- Top teams use 'lattice packings of two-tree units alternating up-and-down orientations'

In [1]:
import pandas as pd
import numpy as np
import os

# Tree shape
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_polygon(cx, cy, deg):
    rad = np.radians(deg)
    c, s = np.cos(rad), np.sin(rad)
    x = TX * c - TY * s + cx
    y = TX * s + TY * c + cy
    return x, y

def score_n(df, n):
    rows = df[df['id'].str.startswith(f'{n:03d}_')]
    if len(rows) == 0:
        return 0
    
    all_x, all_y = [], []
    for _, row in rows.iterrows():
        x_val = float(str(row['x']).replace('s', ''))
        y_val = float(str(row['y']).replace('s', ''))
        deg = float(str(row['deg']).replace('s', ''))
        px, py = get_tree_polygon(x_val, y_val, deg)
        all_x.extend(px)
        all_y.extend(py)
    
    side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
    return side * side / n

# Load baseline
baseline_path = "/home/nonroot/snapshots/santa-2025/21105319338/code/datasets/santa-2025-csv/santa-2025.csv"
df_baseline = pd.read_csv(baseline_path)

# Calculate per-N scores
scores = []
for n in range(1, 201):
    score = score_n(df_baseline, n)
    scores.append({'n': n, 'score': score})

scores_df = pd.DataFrame(scores)
print(f"Total baseline score: {scores_df['score'].sum():.6f}")
print(f"Target score: 68.922808")
print(f"Gap: {scores_df['score'].sum() - 68.922808:.6f}")
print(f"\nTop 10 N values with highest scores (most room for improvement):")
print(scores_df.nlargest(10, 'score'))

Total baseline score: 70.734327
Target score: 68.922808
Gap: 1.811519

Top 10 N values with highest scores (most room for improvement):
     n     score
0    1  0.661250
1    2  0.450779
2    3  0.434745
4    5  0.416850
3    4  0.416545
6    7  0.399897
5    6  0.399610
8    9  0.387415
7    8  0.385407
14  15  0.379203


In [2]:
# Analyze the structure of the baseline for small N values
# to understand the packing patterns

def analyze_config(df, n):
    rows = df[df['id'].str.startswith(f'{n:03d}_')]
    if len(rows) == 0:
        return None
    
    trees = []
    for _, row in rows.iterrows():
        x = float(str(row['x']).replace('s', ''))
        y = float(str(row['y']).replace('s', ''))
        deg = float(str(row['deg']).replace('s', '')) % 360
        trees.append({'x': x, 'y': y, 'deg': deg})
    
    return trees

print("Analyzing baseline configurations for small N:")
print("="*70)
for n in range(1, 11):
    trees = analyze_config(df_baseline, n)
    score = score_n(df_baseline, n)
    
    # Analyze angle distribution
    angles = [t['deg'] for t in trees]
    up_count = sum(1 for a in angles if a < 90 or a > 270)  # pointing up
    down_count = sum(1 for a in angles if 90 <= a <= 270)  # pointing down
    
    print(f"\nN={n}: score={score:.6f}")
    print(f"  Angles: {[f'{a:.1f}' for a in angles]}")
    print(f"  Up/Down ratio: {up_count}/{down_count}")

Analyzing baseline configurations for small N:

N=1: score=0.661250
  Angles: ['45.0']
  Up/Down ratio: 1/0

N=2: score=0.450779
  Angles: ['203.6', '23.6']
  Up/Down ratio: 1/1

N=3: score=0.434745
  Angles: ['113.6', '66.4', '155.1']
  Up/Down ratio: 1/2

N=4: score=0.416545
  Angles: ['156.4', '156.4', '336.4', '336.4']
  Up/Down ratio: 2/2

N=5: score=0.416850
  Angles: ['293.6', '23.6', '112.6', '66.4', '207.5']
  Up/Down ratio: 3/2

N=6: score=0.399610
  Angles: ['293.6', '338.9', '338.9', '23.6', '246.4', '158.9']
  Up/Down ratio: 4/2

N=7: score=0.399897
  Angles: ['37.6', '252.8', '65.2', '336.4', '207.8', '213.5', '27.8']
  Up/Down ratio: 4/3

N=8: score=0.385407
  Angles: ['51.8', '113.6', '293.6', '203.6', '231.8', '293.6', '113.6', '23.6']
  Up/Down ratio: 4/4

N=9: score=0.387415
  Angles: ['154.9', '205.5', '24.1', '296.8', '249.9', '204.8', '336.4', '293.6', '113.6']
  Up/Down ratio: 4/5

N=10: score=0.376630
  Angles: ['290.2', '338.6', '21.4', '292.6', '70.4', '23.6',

In [3]:
# Calculate theoretical minimum scores for each N
# The theoretical minimum is achieved when trees are packed as tightly as possible

# Tree bounding box dimensions (at 0 degrees)
tree_width = 0.7  # from -0.35 to 0.35
tree_height = 1.0  # from -0.2 to 0.8

# For a single tree, the minimum bounding square is max(width, height) = 1.0
# So score for N=1 is 1.0^2 / 1 = 1.0

# For N trees, if we could pack them with no wasted space:
# Area needed = N * tree_area
# But we need a square, so side = sqrt(N * tree_area)
# Minimum score = side^2 / N = tree_area

tree_area = tree_width * tree_height  # 0.7
print(f"Tree dimensions: {tree_width} x {tree_height}")
print(f"Tree area: {tree_area}")
print(f"Theoretical minimum score per N (if perfect packing): {tree_area}")

# But the tree is not a rectangle, so we can do better!
# The actual tree polygon area is smaller
from shapely.geometry import Polygon
tree_poly = Polygon(list(zip(TX, TY)))
actual_area = tree_poly.area
print(f"\nActual tree polygon area: {actual_area:.6f}")
print(f"Theoretical minimum score (if perfect packing): {actual_area:.6f}")

# Compare with baseline
print(f"\nBaseline average score per N: {scores_df['score'].mean():.6f}")
print(f"Ratio to theoretical minimum: {scores_df['score'].mean() / actual_area:.2f}x")

Tree dimensions: 0.7 x 1.0
Tree area: 0.7
Theoretical minimum score per N (if perfect packing): 0.7

Actual tree polygon area: 0.245625
Theoretical minimum score (if perfect packing): 0.245625

Baseline average score per N: 0.353672
Ratio to theoretical minimum: 1.44x


In [4]:
# Let's look at what the gap looks like per N
# Target: 68.922808
# Baseline: 70.734327
# Gap: 1.811519

# If we could improve each N by the same percentage:
required_improvement_pct = (70.734327 - 68.922808) / 70.734327 * 100
print(f"Required improvement: {required_improvement_pct:.2f}%")

# But improvements are likely concentrated in small N values
# Let's see the cumulative contribution
scores_df['cumsum'] = scores_df['score'].cumsum()
scores_df['pct_of_total'] = scores_df['score'] / scores_df['score'].sum() * 100
scores_df['cumsum_pct'] = scores_df['cumsum'] / scores_df['score'].sum() * 100

print(f"\nCumulative score contribution:")
print(f"N=1-10: {scores_df[scores_df['n'] <= 10]['score'].sum():.3f} ({scores_df[scores_df['n'] <= 10]['pct_of_total'].sum():.1f}%)")
print(f"N=1-20: {scores_df[scores_df['n'] <= 20]['score'].sum():.3f} ({scores_df[scores_df['n'] <= 20]['pct_of_total'].sum():.1f}%)")
print(f"N=1-50: {scores_df[scores_df['n'] <= 50]['score'].sum():.3f} ({scores_df[scores_df['n'] <= 50]['pct_of_total'].sum():.1f}%)")
print(f"N=51-200: {scores_df[scores_df['n'] > 50]['score'].sum():.3f} ({scores_df[scores_df['n'] > 50]['pct_of_total'].sum():.1f}%)")

Required improvement: 2.56%

Cumulative score contribution:
N=1-10: 4.329 (6.1%)
N=1-20: 8.058 (11.4%)
N=1-50: 19.045 (26.9%)
N=51-200: 51.689 (73.1%)


In [5]:
# Key insight: We need to find BETTER solutions than the current baseline
# The baseline is already well-optimized, but there must be room for improvement

# Let's check if there are any public solutions that beat our baseline for specific N values

base_path = "/home/nonroot/snapshots/santa-2025/21105319338/code/datasets"

# Load all available CSVs
csvs = [
    ("bucket-of-chump", f"{base_path}/bucket-of-chump/submission.csv"),
    ("santa-2025.csv", f"{base_path}/santa-2025-csv/santa-2025.csv"),
    ("smartmanoj", f"{base_path}/smartmanoj_submission.csv"),
    ("70.926", f"{base_path}/santa25-public/submission_70_926149550346.csv"),
    ("70.937", f"{base_path}/santa25-public/submission_70_936673758122.csv"),
]

all_scores = {}
for name, path in csvs:
    try:
        df = pd.read_csv(path)
        scores = {}
        for n in range(1, 201):
            scores[n] = score_n(df, n)
        all_scores[name] = scores
    except:
        pass

# Find N values where any solution beats the baseline
print("N values where other solutions beat santa-2025.csv:")
print("="*70)
for n in range(1, 51):
    baseline_score = all_scores['santa-2025.csv'][n]
    for name, scores in all_scores.items():
        if name == 'santa-2025.csv':
            continue
        if scores[n] < baseline_score - 1e-9:
            print(f"N={n}: {name} = {scores[n]:.6f} vs baseline = {baseline_score:.6f} (diff: {baseline_score - scores[n]:.6f})")

N values where other solutions beat santa-2025.csv:
