# Loop 14 Analysis: Strategic Assessment

## Current Status
- Best CV: 70.659475 (exp_018 multi-source ensemble)
- Best LB: 70.6600 (exp_005/exp_006)
- Target: 68.919154
- Gap: 1.740321 (2.46%)

## Key Questions
1. What is the theoretical minimum score?
2. Where are the biggest opportunities for improvement?
3. What techniques haven't been tried?

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

# Tree shape constants
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

def get_tree_poly(x, y, deg):
    coords = [
        (0.0, TIP_Y), (TOP_W / 2.0, TIER_1_Y), (TOP_W / 4.0, TIER_1_Y),
        (MID_W / 2.0, TIER_2_Y), (MID_W / 4.0, TIER_2_Y), (BASE_W / 2.0, BASE_Y),
        (TRUNK_W / 2.0, BASE_Y), (TRUNK_W / 2.0, TRUNK_BOTTOM_Y),
        (-TRUNK_W / 2.0, TRUNK_BOTTOM_Y), (-TRUNK_W / 2.0, BASE_Y),
        (-BASE_W / 2.0, BASE_Y), (-MID_W / 4.0, TIER_2_Y), (-MID_W / 2.0, TIER_2_Y),
        (-TOP_W / 4.0, TIER_1_Y), (-TOP_W / 2.0, TIER_1_Y),
    ]
    poly = Polygon(coords)
    return affinity.translate(affinity.rotate(poly, deg, origin=(0, 0)), x, y)

# Calculate tree area
tree = get_tree_poly(0, 0, 0)
tree_area = tree.area
print(f"Tree area: {tree_area:.6f}")
print(f"Tree bounding box at 0 deg: {tree.bounds}")
print(f"Tree width at 0 deg: {tree.bounds[2] - tree.bounds[0]:.4f}")
print(f"Tree height at 0 deg: {tree.bounds[3] - tree.bounds[1]:.4f}")

Tree area: 0.245625
Tree bounding box at 0 deg: (-0.35, -0.2, 0.35, 0.8)
Tree width at 0 deg: 0.7000
Tree height at 0 deg: 1.0000


In [2]:
# Load current best submission
df = pd.read_csv('/home/submission/submission.csv')
print(f"Submission rows: {len(df)}")
print(df.head())

# Calculate score per N
def calculate_score_for_n(df, n):
    prefix = f"{n:03d}_"
    group = df[df['id'].str.startswith(prefix)]
    if len(group) != n:
        return None
    
    xs = group['x'].values
    ys = group['y'].values
    angles = group['angle'].values
    
    min_x = min_y = float('inf')
    max_x = max_y = float('-inf')
    
    for i in range(n):
        poly = get_tree_poly(xs[i], ys[i], angles[i])
        bounds = poly.bounds
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])
    
    side = max(max_x - min_x, max_y - min_y)
    return side * side / n

scores = {}
for n in range(1, 201):
    score = calculate_score_for_n(df, n)
    if score:
        scores[n] = score

total_score = sum(scores.values())
print(f"\nTotal score: {total_score:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {total_score - 68.919154:.6f} ({(total_score - 68.919154) / 68.919154 * 100:.2f}%)")

Submission rows: 20100
        id          x          y       angle
0  001_000 -48.196086  58.770985   45.000000
1  002_000   0.154097  -0.038541  203.629378
2  002_001  -0.154097  -0.561459   23.629378
3  003_000   1.123656   0.781102  111.125132
4  003_001   1.234056   1.276000   66.370622



Total score: 70.659475
Target: 68.919154
Gap: 1.740321 (2.53%)


In [3]:
# Analyze score contribution by N ranges
ranges = [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]

print("Score contribution by N range:")
print("="*60)
for start, end in ranges:
    range_score = sum(scores[n] for n in range(start, end+1) if n in scores)
    range_pct = range_score / total_score * 100
    print(f"N={start:3d}-{end:3d}: {range_score:.4f} ({range_pct:.2f}%)")

print("\nTop 10 N values by score contribution:")
sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:10]
for n, score in sorted_scores:
    print(f"  N={n:3d}: {score:.6f} ({score/total_score*100:.2f}%)")

Score contribution by N range:
N=  1- 10: 4.3291 (6.13%)
N= 11- 50: 14.7126 (20.82%)
N= 51-100: 17.6318 (24.95%)
N=101-150: 17.1408 (24.26%)
N=151-200: 16.8451 (23.84%)

Top 10 N values by score contribution:
  N=  1: 0.661250 (0.94%)
  N=  2: 0.450779 (0.64%)
  N=  3: 0.434745 (0.62%)
  N=  5: 0.416850 (0.59%)
  N=  4: 0.416545 (0.59%)
  N=  7: 0.399897 (0.57%)
  N=  6: 0.399610 (0.57%)
  N=  9: 0.387415 (0.55%)
  N=  8: 0.385407 (0.55%)
  N= 15: 0.379203 (0.54%)


In [4]:
# Calculate theoretical minimum (perfect packing)
# For N trees, minimum area = N * tree_area
# Minimum side = sqrt(N * tree_area)
# Score contribution = side^2 / N = tree_area

print("Theoretical minimum analysis:")
print("="*60)
theoretical_min = tree_area * 200  # Sum of tree_area for N=1 to 200
print(f"Theoretical minimum total score: {theoretical_min:.6f}")
print(f"Current score: {total_score:.6f}")
print(f"Efficiency: {theoretical_min / total_score * 100:.2f}%")

# Per-N efficiency
print("\nEfficiency by N range:")
for start, end in ranges:
    range_score = sum(scores[n] for n in range(start, end+1) if n in scores)
    range_theoretical = tree_area * (end - start + 1)
    efficiency = range_theoretical / range_score * 100
    print(f"N={start:3d}-{end:3d}: {efficiency:.2f}% efficient")

Theoretical minimum analysis:
Theoretical minimum total score: 49.125000
Current score: 70.659475
Efficiency: 69.52%

Efficiency by N range:
N=  1- 10: 56.74% efficient
N= 11- 50: 66.78% efficient
N= 51-100: 69.65% efficient
N=101-150: 71.65% efficient
N=151-200: 72.91% efficient


In [5]:
# What improvement is needed per N to reach target?
target = 68.919154
gap = total_score - target
avg_improvement_needed = gap / 200

print(f"Gap to target: {gap:.6f}")
print(f"Average improvement needed per N: {avg_improvement_needed:.6f}")

# Where is the most room for improvement?
print("\nN values with most room for improvement (vs theoretical):")
room_for_improvement = []
for n, score in scores.items():
    theoretical = tree_area  # Each N contributes tree_area at perfect packing
    room = score - theoretical
    room_for_improvement.append((n, score, theoretical, room, room/score*100))

room_for_improvement.sort(key=lambda x: x[3], reverse=True)
for n, score, theoretical, room, pct in room_for_improvement[:15]:
    print(f"  N={n:3d}: score={score:.6f}, theoretical={theoretical:.6f}, room={room:.6f} ({pct:.1f}%)")

Gap to target: 1.740321
Average improvement needed per N: 0.008702

N values with most room for improvement (vs theoretical):
  N=  1: score=0.661250, theoretical=0.245625, room=0.415625 (62.9%)
  N=  2: score=0.450779, theoretical=0.245625, room=0.205154 (45.5%)
  N=  3: score=0.434745, theoretical=0.245625, room=0.189120 (43.5%)
  N=  5: score=0.416850, theoretical=0.245625, room=0.171225 (41.1%)
  N=  4: score=0.416545, theoretical=0.245625, room=0.170920 (41.0%)
  N=  7: score=0.399897, theoretical=0.245625, room=0.154272 (38.6%)
  N=  6: score=0.399610, theoretical=0.245625, room=0.153985 (38.5%)
  N=  9: score=0.387415, theoretical=0.245625, room=0.141790 (36.6%)
  N=  8: score=0.385407, theoretical=0.245625, room=0.139782 (36.3%)
  N= 15: score=0.379203, theoretical=0.245625, room=0.133578 (35.2%)
  N= 10: score=0.376630, theoretical=0.245625, room=0.131005 (34.8%)
  N= 21: score=0.376451, theoretical=0.245625, room=0.130826 (34.8%)
  N= 20: score=0.376057, theoretical=0.245625,

In [6]:
# Analyze what the target score implies
print("Target score analysis:")
print("="*60)
print(f"Target: {target:.6f}")
print(f"Current: {total_score:.6f}")
print(f"Gap: {gap:.6f}")

# If we could achieve perfect packing for some N values
print("\nScenarios to reach target:")

# Scenario 1: Uniform improvement
print(f"\n1. Uniform improvement: Need {avg_improvement_needed:.6f} per N")

# Scenario 2: Improve only small N (1-10)
small_n_score = sum(scores[n] for n in range(1, 11))
small_n_theoretical = tree_area * 10
small_n_room = small_n_score - small_n_theoretical
print(f"\n2. Small N (1-10) has room: {small_n_room:.6f}")
print(f"   If we achieve theoretical for N=1-10, we save: {small_n_room:.6f}")
print(f"   Remaining gap: {gap - small_n_room:.6f}")

# Scenario 3: Improve large N (100-200)
large_n_score = sum(scores[n] for n in range(100, 201))
large_n_theoretical = tree_area * 101
large_n_room = large_n_score - large_n_theoretical
print(f"\n3. Large N (100-200) has room: {large_n_room:.6f}")
print(f"   If we achieve theoretical for N=100-200, we save: {large_n_room:.6f}")
print(f"   Remaining gap: {gap - large_n_room:.6f}")

Target score analysis:
Target: 68.919154
Current: 70.659475
Gap: 1.740321

Scenarios to reach target:

1. Uniform improvement: Need 0.008702 per N

2. Small N (1-10) has room: 1.872878
   If we achieve theoretical for N=1-10, we save: 1.872878
   Remaining gap: -0.132557

3. Large N (100-200) has room: 9.523313
   If we achieve theoretical for N=100-200, we save: 9.523313
   Remaining gap: -7.782992


In [7]:
# What does the target efficiency look like?
target_efficiency = theoretical_min / target * 100
print(f"Target requires efficiency: {target_efficiency:.2f}%")
print(f"Current efficiency: {theoretical_min / total_score * 100:.2f}%")

# The gap in efficiency terms
efficiency_gap = target_efficiency - (theoretical_min / total_score * 100)
print(f"Efficiency gap: {efficiency_gap:.2f}%")

# This means we need to improve packing efficiency by ~2.5%
# This is a VERY tight margin for geometric packing

Target requires efficiency: 71.28%
Current efficiency: 69.52%
Efficiency gap: 1.76%


In [None]:
# Key insight: The target (68.919154) requires ~71.3% packing efficiency
# Current best (70.659475) achieves ~69.5% efficiency
# The gap is 1.8% efficiency improvement

# This is achievable through:
# 1. Better small N configurations (N=1-10 have worst efficiency)
# 2. Asymmetric packing for medium/large N
# 3. Techniques not in public kernels

print("\n" + "="*60)
print("STRATEGIC CONCLUSIONS")
print("="*60)
print("")
print("1. The target IS achievable - it requires 71.3% efficiency")
print("   vs current 69.5% efficiency (1.8% gap)")
print("")
print("2. Small N (1-10) has the WORST efficiency - biggest opportunity")
print("   N=1 alone contributes 0.66 to score (0.94% of total)")
print("")
print("3. Large N (100-200) contributes 48.6% of score")
print("   Even small improvements here add up")
print("")
print("4. All public kernels converge to ~70.66 score")
print("   Top competitors use techniques NOT in public kernels")
print("")
print("5. Web research indicates 'asymmetric packing' is key")
print("   Non-mirrored patterns that exploit tree shape")
print("")
print("6. The multi-source ensemble found 138 improvements")
print("   This proves different optimization runs find different optima")
print("   Running MORE diverse experiments could find more improvements")