# Loop 4 Analysis: Understanding the Gap

## Key Findings:
1. Eazy optimizer produces INVALID submissions (overlaps detected by Kaggle but not Shapely)
2. Strict ensemble scores 70.615745 - 0.06 better than baseline, NO overlaps
3. Gap to target: 70.615745 - 68.891380 = 1.72 points (2.5%)

## Strategy Assessment:
- Local optimization (SA, eazy) is stuck at local optimum
- Ensemble from multiple sources provides marginal improvement
- Need fundamentally different approach to close 1.72 point gap

In [1]:
import pandas as pd
import numpy as np
import json

# Load session state
with open('/home/code/session_state.json') as f:
    state = json.load(f)

print('Experiments:')
for exp in state['experiments']:
    print(f"  {exp['name']}: CV={exp['cv_score']:.6f}")

print('\nSubmissions:')
for sub in state['submissions']:
    print(f"  CV={sub['cv_score']:.6f}, LB={sub['lb_score']}, Error={sub.get('error', 'None')}")

print(f"\nRemaining submissions: {state['remaining_submissions']}")
print(f"Target: 68.891380")

Experiments:
  001_baseline: CV=70.676102
  002_tessellation_attempts: CV=70.676102
  003_eazy_optimizer: CV=70.676059
  004_eazy_long_run: CV=70.675672

Submissions:
  CV=70.676102, LB=70.676102398091, Error=None
  CV=70.676059, LB=70.676059085435, Error=None
  CV=70.675672, LB=, Error=Overlapping trees in group 013

Remaining submissions: 96
Target: 68.891380


In [2]:
# Analyze the strict ensemble - where does it improve over baseline?
import pandas as pd
import numpy as np

TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def strip(s):
    return float(str(s).replace('s', ''))

def get_score_per_n(df):
    scores = {}
    for n in range(1, 201):
        group = df[df['id'].str.startswith(f'{n:03d}_')]
        xs = [strip(x) for x in group['x']]
        ys = [strip(y) for y in group['y']]
        degs = [strip(d) for d in group['deg']]
        
        minx, maxx = float('inf'), float('-inf')
        miny, maxy = float('inf'), float('-inf')
        
        for x, y, deg in zip(xs, ys, degs):
            r = np.radians(deg)
            c, s = np.cos(r), np.sin(r)
            for tx, ty in zip(TX, TY):
                px = tx * c - ty * s + x
                py = tx * s + ty * c + y
                minx, maxx = min(minx, px), max(maxx, px)
                miny, maxy = min(miny, py), max(maxy, py)
        
        side = max(maxx - minx, maxy - miny)
        scores[n] = side**2 / n
    return scores

# Load both
baseline = pd.read_csv('/home/code/experiments/003_long_sa/submission_best.csv')
ensemble = pd.read_csv('/home/code/experiments/005_ensemble/submission_ensemble_strict.csv')

baseline_scores = get_score_per_n(baseline)
ensemble_scores = get_score_per_n(ensemble)

print('N values where ensemble is better:')
improvements = []
for n in range(1, 201):
    diff = baseline_scores[n] - ensemble_scores[n]
    if diff > 0.0001:
        improvements.append((n, diff, baseline_scores[n], ensemble_scores[n]))
        
improvements.sort(key=lambda x: -x[1])
for n, diff, base, ens in improvements[:20]:
    print(f'  N={n}: {base:.6f} -> {ens:.6f} (improvement: {diff:.6f})')

print(f'\nTotal improvements: {len(improvements)} N values')
print(f'Total improvement: {sum(d for _, d, _, _ in improvements):.6f}')

N values where ensemble is better:
  N=54: 0.361321 -> 0.356260 (improvement: 0.005060)
  N=57: 0.358045 -> 0.353509 (improvement: 0.004536)
  N=87: 0.353823 -> 0.349960 (improvement: 0.003864)
  N=88: 0.350672 -> 0.347501 (improvement: 0.003171)
  N=128: 0.343762 -> 0.340751 (improvement: 0.003011)
  N=43: 0.370040 -> 0.367065 (improvement: 0.002975)
  N=94: 0.352274 -> 0.349956 (improvement: 0.002318)
  N=15: 0.379203 -> 0.376949 (improvement: 0.002254)
  N=65: 0.363795 -> 0.361611 (improvement: 0.002184)
  N=100: 0.345531 -> 0.343395 (improvement: 0.002136)
  N=76: 0.351603 -> 0.349595 (improvement: 0.002008)
  N=64: 0.350468 -> 0.348685 (improvement: 0.001783)
  N=101: 0.350389 -> 0.348656 (improvement: 0.001733)
  N=157: 0.341876 -> 0.340201 (improvement: 0.001674)
  N=74: 0.354139 -> 0.352709 (improvement: 0.001430)
  N=95: 0.349094 -> 0.347734 (improvement: 0.001360)
  N=162: 0.338332 -> 0.337022 (improvement: 0.001311)
  N=136: 0.345460 -> 0.344159 (improvement: 0.001301)
  N=1

In [3]:
# What's the theoretical minimum score?
# Tree area = 0.245625
# For N trees, minimum bounding box area = N * tree_area / packing_efficiency
# Best known packing efficiency for irregular shapes is ~80-85%

tree_area = 0.245625
print(f'Tree area: {tree_area}')

# Calculate theoretical minimum assuming perfect packing
theoretical_min = 0
for n in range(1, 201):
    # Minimum area = n * tree_area (if 100% packing efficiency)
    # Minimum side = sqrt(n * tree_area)
    # Score contribution = side^2 / n = n * tree_area / n = tree_area
    theoretical_min += tree_area

print(f'Theoretical minimum (100% efficiency): {theoretical_min:.6f}')
print(f'Current best: 70.615745')
print(f'Target: 68.891380')
print(f'Gap from theoretical: {70.615745 - theoretical_min:.6f}')
print(f'Gap to target: {70.615745 - 68.891380:.6f}')
print(f'\nTo reach target, we need to capture {(70.615745 - 68.891380) / (70.615745 - theoretical_min) * 100:.1f}% of remaining improvement potential')

Tree area: 0.245625
Theoretical minimum (100% efficiency): 49.125000
Current best: 70.615745
Target: 68.891380
Gap from theoretical: 21.490745
Gap to target: 1.724365

To reach target, we need to capture 8.0% of remaining improvement potential


In [4]:
# Analyze which N values have the most room for improvement
# Compare current efficiency to theoretical maximum

print('N values with worst efficiency (most room for improvement):')
efficiencies = []
for n in range(1, 201):
    current_score = ensemble_scores[n]
    # current_score = side^2 / n
    # side^2 = current_score * n
    # area = side^2
    # efficiency = (n * tree_area) / area = (n * tree_area) / (current_score * n) = tree_area / current_score
    efficiency = tree_area / current_score * 100
    efficiencies.append((n, efficiency, current_score))

efficiencies.sort(key=lambda x: x[1])
print('\nLowest efficiency (most room for improvement):')
for n, eff, score in efficiencies[:15]:
    print(f'  N={n}: {eff:.1f}% efficiency, score={score:.6f}')

print('\nHighest efficiency (least room for improvement):')
for n, eff, score in efficiencies[-10:]:
    print(f'  N={n}: {eff:.1f}% efficiency, score={score:.6f}')

N values with worst efficiency (most room for improvement):

Lowest efficiency (most room for improvement):
  N=1: 37.1% efficiency, score=0.661250
  N=2: 54.5% efficiency, score=0.450779
  N=3: 56.5% efficiency, score=0.434745
  N=5: 58.9% efficiency, score=0.416850
  N=4: 59.0% efficiency, score=0.416545
  N=7: 61.4% efficiency, score=0.399897
  N=6: 61.5% efficiency, score=0.399610
  N=9: 63.4% efficiency, score=0.387415
  N=8: 63.7% efficiency, score=0.385407
  N=15: 65.2% efficiency, score=0.376949
  N=10: 65.2% efficiency, score=0.376630
  N=21: 65.2% efficiency, score=0.376451
  N=20: 65.3% efficiency, score=0.376057
  N=22: 65.5% efficiency, score=0.375258
  N=11: 65.5% efficiency, score=0.374924

Highest efficiency (least room for improvement):
  N=194: 73.8% efficiency, score=0.332999
  N=167: 73.8% efficiency, score=0.332835
  N=179: 73.9% efficiency, score=0.332591
  N=195: 73.9% efficiency, score=0.332576
  N=168: 73.9% efficiency, score=0.332475
  N=155: 74.0% efficiency,