# Loop 13 Analysis: Strategic Assessment

## Current State
- Best CV: 70.659943 (eazy_output)
- Best LB: 70.659958 (saspav_latest)
- Target: 68.919154
- Gap: 1.74 points (2.46%)

## Key Questions
1. What approaches have NOT been tried?
2. What does the chistyakov corner extraction approach do?
3. Can we combine multiple techniques?

In [1]:
import pandas as pd
import numpy as np
import os

# Load session state to see all experiments
import json
with open('/home/code/session_state.json', 'r') as f:
    state = json.load(f)

print('=== EXPERIMENT HISTORY ===')
for exp in state['experiments']:
    print(f"{exp['id']}: {exp['name']} | CV={exp.get('cv_score', '?')} | LB={exp.get('lb_score', '?')}")

print(f"\n=== SUBMISSIONS ===")
for sub in state.get('submissions', []):
    print(f"{sub['experiment_id']}: CV={sub.get('cv_score', '?')} LB={sub.get('lb_score', '?')}")

print(f"\nRemaining submissions: {state.get('remaining_submissions', '?')}")
print(f"Target: 68.919154")
print(f"Best CV: 70.659943 (eazy_output)")
print(f"Gap: {70.659943 - 68.919154:.6f} ({(70.659943 - 68.919154)/68.919154*100:.2f}%)")

=== EXPERIMENT HISTORY ===
exp_000: 001_baseline | CV=70.676102 | LB=None
exp_001: 002_sa_v1_long_run | CV=70.676102 | LB=None
exp_002: 005_006_optimization_attempts | CV=70.676102 | LB=None
exp_003: 007_008_advanced_optimization | CV=70.676102 | LB=None
exp_004: 009_perturbation | CV=70.676102 | LB=None
exp_005: 010_saspav_latest_baseline | CV=70.659958 | LB=None
exp_006: 011_long_optimization | CV=70.659958 | LB=None
exp_007: 012_lattice_sa | CV=70.659958 | LB=None
exp_008: 013_jiweiliu_correct_seeds | CV=70.659958 | LB=None
exp_009: 014_jiweiliu_full_sa | CV=70.659958 | LB=None
exp_010: 015_crodoc_ensemble | CV=70.659944 | LB=None
exp_011: 016_asymmetric_random_restart | CV=70.659958 | LB=None
exp_012: 017_dimer_mosaic | CV=70.659958 | LB=None

=== SUBMISSIONS ===
exp_000: CV=70.676102 LB=70.676102398091
exp_001: CV=70.676102 LB=
exp_004: CV=70.676102 LB=70.676102398091
exp_005: CV=70.659958 LB=70.659958321926
exp_006: CV=70.659958 LB=70.659958321926

Remaining submissions: 100
Targ

In [2]:
# Analyze what approaches have been tried
approaches_tried = [
    ('SA optimization', 'exp_001, exp_006, exp_007', 'No improvement'),
    ('Fractional translation', 'exp_002', 'No improvement'),
    ('Backward propagation', 'exp_003', 'No improvement'),
    ('bbox3 optimizer', 'exp_003, exp_008', 'No improvement'),
    ('Perturbation + SA', 'exp_004', 'Collisions immediately'),
    ('Lattice SA (jiweiliu)', 'exp_007, exp_013, exp_014', 'Requires pre-optimized seeds'),
    ('Crodoc ensemble', 'exp_010', 'Tiny improvement (0.000014)'),
    ('Asymmetric random restart', 'exp_016', 'No improvement'),
    ('Dimer mosaic', 'exp_017', '3.3x WORSE'),
]

print('=== APPROACHES TRIED ===')
for approach, exps, result in approaches_tried:
    print(f'{approach}: {result}')

print('\n=== APPROACHES NOT TRIED ===')
approaches_not_tried = [
    'Chistyakov corner extraction (remove trees touching bbox)',
    'Very long optimization runs (hours)',
    'Small N exhaustive optimization (N=1-5)',
    'Eazy optimizer full run',
    'Combining multiple techniques in sequence',
]
for approach in approaches_not_tried:
    print(f'- {approach}')

=== APPROACHES TRIED ===
SA optimization: No improvement
Fractional translation: No improvement
Backward propagation: No improvement
bbox3 optimizer: No improvement
Perturbation + SA: Collisions immediately
Lattice SA (jiweiliu): Requires pre-optimized seeds
Crodoc ensemble: Tiny improvement (0.000014)
Asymmetric random restart: No improvement
Dimer mosaic: 3.3x WORSE

=== APPROACHES NOT TRIED ===
- Chistyakov corner extraction (remove trees touching bbox)
- Very long optimization runs (hours)
- Small N exhaustive optimization (N=1-5)
- Eazy optimizer full run
- Combining multiple techniques in sequence


In [3]:
# Analyze score contributions by N
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

def get_tree_poly(x, y, deg):
    coords = [
        (0.0, TIP_Y), (TOP_W / 2.0, TIER_1_Y), (TOP_W / 4.0, TIER_1_Y),
        (MID_W / 2.0, TIER_2_Y), (MID_W / 4.0, TIER_2_Y), (BASE_W / 2.0, BASE_Y),
        (TRUNK_W / 2.0, BASE_Y), (TRUNK_W / 2.0, TRUNK_BOTTOM_Y),
        (-TRUNK_W / 2.0, TRUNK_BOTTOM_Y), (-TRUNK_W / 2.0, BASE_Y),
        (-BASE_W / 2.0, BASE_Y), (-MID_W / 4.0, TIER_2_Y), (-MID_W / 2.0, TIER_2_Y),
        (-TOP_W / 4.0, TIER_1_Y), (-TOP_W / 2.0, TIER_1_Y),
    ]
    poly = Polygon(coords)
    return affinity.translate(affinity.rotate(poly, deg, origin=(0, 0)), x, y)

# Load best submission
df = pd.read_csv('/home/submission/submission.csv')

scores_by_n = {}
for n in range(1, 201):
    prefix = f"{n:03d}_"
    group = df[df["id"].str.startswith(prefix)].sort_values("id")
    if len(group) != n:
        continue
    
    xs = group['x'].values
    ys = group['y'].values
    degs = group['angle'].values
    
    polys = [get_tree_poly(xs[i], ys[i], degs[i]) for i in range(n)]
    bounds = unary_union(polys).bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    side = max(width, height)
    score = side * side / n
    scores_by_n[n] = score

print('=== TOP 10 SCORE CONTRIBUTORS ===')
for n, score in sorted(scores_by_n.items(), key=lambda x: -x[1])[:10]:
    print(f'N={n}: {score:.6f}')

print('\n=== SCORE BY RANGE ===')
ranges = [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]
for start, end in ranges:
    range_score = sum(scores_by_n[n] for n in range(start, end+1) if n in scores_by_n)
    print(f'N={start}-{end}: {range_score:.4f} ({range_score/sum(scores_by_n.values())*100:.1f}%)')

=== TOP 10 SCORE CONTRIBUTORS ===
N=1: 0.661250
N=2: 0.450779
N=3: 0.434745
N=5: 0.416850
N=4: 0.416545
N=7: 0.399897
N=6: 0.399610
N=9: 0.387415
N=8: 0.385407
N=15: 0.379203

=== SCORE BY RANGE ===
N=1-10: 4.3291 (6.1%)
N=11-50: 14.7126 (20.8%)
N=51-100: 17.6323 (25.0%)
N=101-150: 17.1408 (24.3%)
N=151-200: 16.8451 (23.8%)


In [4]:
# Calculate what improvement is needed per N to reach target
target = 68.919154
current = sum(scores_by_n.values())
gap = current - target

print(f'Current total: {current:.6f}')
print(f'Target: {target}')
print(f'Gap: {gap:.6f}')
print(f'\nIf we improve each N by the same percentage:')
required_pct = gap / current * 100
print(f'Required improvement: {required_pct:.2f}%')

print(f'\nIf we only improve large N (>100):')
large_n_score = sum(scores_by_n[n] for n in range(101, 201) if n in scores_by_n)
required_pct_large = gap / large_n_score * 100
print(f'Large N contributes: {large_n_score:.4f}')
print(f'Required improvement on large N: {required_pct_large:.2f}%')

print(f'\nIf we only improve small N (1-10):')
small_n_score = sum(scores_by_n[n] for n in range(1, 11) if n in scores_by_n)
required_pct_small = gap / small_n_score * 100
print(f'Small N contributes: {small_n_score:.4f}')
print(f'Required improvement on small N: {required_pct_small:.2f}%')

Current total: 70.659943
Target: 68.919154
Gap: 1.740789

If we improve each N by the same percentage:
Required improvement: 2.46%

If we only improve large N (>100):
Large N contributes: 33.9859
Required improvement on large N: 5.12%

If we only improve small N (1-10):
Small N contributes: 4.3291
Required improvement on small N: 40.21%


In [5]:
# Check N=1 specifically - it's the highest contributor
print('=== N=1 ANALYSIS ===')
group = df[df["id"].str.startswith("001_")].sort_values("id")
print(f'N=1 configuration:')
print(group)

x, y, angle = group['x'].values[0], group['y'].values[0], group['angle'].values[0]
print(f'\nPosition: ({x:.6f}, {y:.6f})')
print(f'Angle: {angle:.6f}')

# For N=1, the optimal angle should minimize the bounding box
# The tree is symmetric, so 45 degrees should be optimal
print(f'\nFor N=1, angle=45° should be optimal (minimizes bounding box of symmetric tree)')
print(f'Current angle: {angle:.1f}°')

# Calculate score for different angles
print('\nScore for different angles:')
for test_angle in [0, 30, 45, 60, 90, 180]:
    poly = get_tree_poly(0, 0, test_angle)
    bounds = poly.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    side = max(width, height)
    score = side * side / 1
    print(f'  Angle={test_angle}°: side={side:.6f}, score={score:.6f}')

=== N=1 ANALYSIS ===
N=1 configuration:
        id          x         y  angle
0  001_000  40.027194 -32.00734   45.0

Position: (40.027194, -32.007340)
Angle: 45.000000

For N=1, angle=45° should be optimal (minimizes bounding box of symmetric tree)
Current angle: 45.0°

Score for different angles:
  Angle=0°: side=1.000000, score=1.000000
  Angle=30°: side=0.903525, score=0.816358
  Angle=45°: side=0.813173, score=0.661250
  Angle=60°: side=0.903525, score=0.816358
  Angle=90°: side=1.000000, score=1.000000
  Angle=180°: side=1.000000, score=1.000000


In [6]:
# Implement chistyakov corner extraction approach
# For each N from 200 down to 2, try removing trees that touch the bounding box
# and see if the resulting (N-1) configuration is better than the current (N-1)

from shapely.geometry import box
from shapely.strtree import STRtree
from decimal import Decimal, getcontext
import copy

getcontext().prec = 25

def get_bbox_touching_tree_indices(polys):
    """Find indices of trees that touch the bounding box."""
    if not polys:
        return []
    
    # Get overall bounding box
    all_bounds = [p.bounds for p in polys]
    min_x = min(b[0] for b in all_bounds)
    min_y = min(b[1] for b in all_bounds)
    max_x = max(b[2] for b in all_bounds)
    max_y = max(b[3] for b in all_bounds)
    
    # Create bounding box polygon
    bbox = box(min_x, min_y, max_x, max_y)
    
    # Find trees that touch the bbox
    touching = []
    for i, poly in enumerate(polys):
        if poly.touches(bbox) or poly.intersects(bbox.boundary):
            touching.append(i)
    
    return touching

def get_side_length(polys):
    """Calculate the side length of the bounding square."""
    if not polys:
        return 0.0
    all_bounds = [p.bounds for p in polys]
    min_x = min(b[0] for b in all_bounds)
    min_y = min(b[1] for b in all_bounds)
    max_x = max(b[2] for b in all_bounds)
    max_y = max(b[3] for b in all_bounds)
    return max(max_x - min_x, max_y - min_y)

# Load current best submission
df = pd.read_csv('/home/submission/submission.csv')

# Build configurations
configs = {}
for n in range(1, 201):
    prefix = f"{n:03d}_"
    group = df[df["id"].str.startswith(prefix)].sort_values("id")
    if len(group) != n:
        continue
    
    xs = group['x'].values
    ys = group['y'].values
    degs = group['angle'].values
    
    polys = [get_tree_poly(xs[i], ys[i], degs[i]) for i in range(n)]
    configs[n] = {
        'x': xs.tolist(),
        'y': ys.tolist(),
        'angle': degs.tolist(),
        'polys': polys,
        'side': get_side_length(polys)
    }

print(f"Loaded {len(configs)} configurations")
print(f"Current total score: {sum(c['side']**2/n for n, c in configs.items()):.6f}")

Loaded 200 configurations
Current total score: 70.659943


In [7]:
# Run corner extraction
print("Running corner extraction...")
improvements = []
total_improvements = 0

for n_main in range(200, 2, -1):
    if n_main not in configs:
        continue
    
    # Start with the N configuration
    current_polys = configs[n_main]['polys'].copy()
    current_x = configs[n_main]['x'].copy()
    current_y = configs[n_main]['y'].copy()
    current_angle = configs[n_main]['angle'].copy()
    
    # Try to extract subsets for smaller N
    while len(current_polys) > 1:
        n_target = len(current_polys) - 1
        
        if n_target not in configs:
            break
        
        best_side = configs[n_target]['side']
        best_idx_to_delete = None
        best_new_side = None
        
        # Find trees touching the bounding box
        touching_indices = get_bbox_touching_tree_indices(current_polys)
        
        if not touching_indices:
            # If no trees touch bbox, try all trees
            touching_indices = list(range(len(current_polys)))
        
        # Try removing each touching tree
        for idx in touching_indices:
            test_polys = [p for i, p in enumerate(current_polys) if i != idx]
            test_side = get_side_length(test_polys)
            
            if test_side < best_side:
                best_side = test_side
                best_idx_to_delete = idx
                best_new_side = test_side
        
        # If we found an improvement, update
        if best_idx_to_delete is not None:
            # Remove the tree
            del current_polys[best_idx_to_delete]
            del current_x[best_idx_to_delete]
            del current_y[best_idx_to_delete]
            del current_angle[best_idx_to_delete]
            
            # Update the target configuration
            old_score = configs[n_target]['side']**2 / n_target
            new_score = best_new_side**2 / n_target
            improvement = old_score - new_score
            
            if improvement > 1e-10:
                configs[n_target] = {
                    'x': current_x.copy(),
                    'y': current_y.copy(),
                    'angle': current_angle.copy(),
                    'polys': current_polys.copy(),
                    'side': best_new_side
                }
                improvements.append((n_target, improvement))
                total_improvements += improvement
                print(f"N={n_target}: improved by {improvement:.9f} (new side: {best_new_side:.6f})")
        else:
            # No improvement found, just remove a tree and continue
            del current_polys[0]
            del current_x[0]
            del current_y[0]
            del current_angle[0]
        
        # Stop if we've gone too far from the main N
        if n_main - n_target > 10:
            break

print(f"\\nTotal improvements found: {len(improvements)}")
print(f"Total score improvement: {total_improvements:.9f}")

Running corner extraction...


\nTotal improvements found: 0
Total score improvement: 0.000000000
