# Loop 2 Analysis: Understanding the Optimization Landscape

## Key Findings from Previous Experiments

1. **Baseline at local optimum**: bbox3 ran 80 rounds with ZERO improvement
2. **Ensemble from 40 snapshots**: Achieved 70.559 (0.1 better than 70.676)
3. **Target**: 68.919154 (gap: 1.64 points from best ensemble)

## Critical Insights from Discussions
- Symmetric solutions (42 upvotes) vs Asymmetric solutions (34 upvotes) debate
- The solution is at a very tight local optimum
- Need fundamentally different approaches

In [1]:
import pandas as pd
import numpy as np
import os
import glob
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree

# Tree geometry
def get_tree_polygon(cx, cy, angle):
    trunk_w, trunk_h = 0.15, 0.2
    base_w, mid_w, top_w = 0.7, 0.4, 0.25
    tip_y, tier_1_y, tier_2_y, base_y = 0.8, 0.5, 0.25, 0.0
    trunk_bottom_y = -trunk_h
    
    vertices = [
        (0.0, tip_y), (top_w/2, tier_1_y), (top_w/4, tier_1_y),
        (mid_w/2, tier_2_y), (mid_w/4, tier_2_y), (base_w/2, base_y),
        (trunk_w/2, base_y), (trunk_w/2, trunk_bottom_y),
        (-trunk_w/2, trunk_bottom_y), (-trunk_w/2, base_y),
        (-base_w/2, base_y), (-mid_w/4, tier_2_y), (-mid_w/2, tier_2_y),
        (-top_w/4, tier_1_y), (-top_w/2, tier_1_y)
    ]
    poly = Polygon(vertices)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, xoff=cx, yoff=cy)
    return poly

def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

print('Functions defined')

Functions defined


In [2]:
# Load the ensemble solution and analyze per-N scores
def analyze_per_n_scores(csv_path):
    df = pd.read_csv(csv_path)
    scores = []
    
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        rows = df[df['id'].str.startswith(prefix)]
        if len(rows) != n:
            continue
        
        all_points = []
        for _, row in rows.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            poly = get_tree_polygon(x, y, deg)
            coords = np.array(poly.exterior.coords)
            all_points.extend(coords)
        
        all_points = np.array(all_points)
        min_xy = all_points.min(axis=0)
        max_xy = all_points.max(axis=0)
        side = max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])
        score_n = (side ** 2) / n
        scores.append({'n': n, 'side': side, 'score': score_n, 'efficiency': n / (side ** 2)})
    
    return pd.DataFrame(scores)

# Analyze the ensemble solution
ensemble_df = analyze_per_n_scores('/home/code/ensemble_70.559.csv')
print(f'Total score: {ensemble_df["score"].sum():.6f}')
print(f'\nTop 10 worst efficiency (highest score contribution):')
print(ensemble_df.nsmallest(10, 'efficiency')[['n', 'side', 'score', 'efficiency']])

Total score: 70.559048

Top 10 worst efficiency (highest score contribution):
     n      side     score  efficiency
0    1  0.813173  0.661250    1.512287
1    2  0.935230  0.437328    2.286615
2    3  1.142031  0.434745    2.300198
3    4  1.282273  0.411056    2.432759
6    7  1.673104  0.399897    2.500647
5    6  1.548438  0.399610    2.502438
4    5  1.403761  0.394109    2.537368
8    9  1.867280  0.387415    2.581211
7    8  1.755921  0.385407    2.594658
14  15  2.384962  0.379203    2.637111


In [3]:
# Compare with baseline
baseline_df = analyze_per_n_scores('/home/code/current.csv')
print(f'Baseline total score: {baseline_df["score"].sum():.6f}')
print(f'Ensemble total score: {ensemble_df["score"].sum():.6f}')
print(f'Improvement: {baseline_df["score"].sum() - ensemble_df["score"].sum():.6f}')

# Find N values where ensemble is better
merged = baseline_df.merge(ensemble_df, on='n', suffixes=('_base', '_ens'))
merged['improvement'] = merged['score_base'] - merged['score_ens']
print(f'\nN values where ensemble is better:')
print(merged[merged['improvement'] > 0.001][['n', 'score_base', 'score_ens', 'improvement']].head(20))

Baseline total score: 70.676102
Ensemble total score: 70.559048
Improvement: 0.117055

N values where ensemble is better:
       n  score_base  score_ens  improvement
1      2    0.450779   0.437328     0.013452
3      4    0.416545   0.411056     0.005489
4      5    0.416850   0.394109     0.022740
46    47    0.357493   0.356418     0.001075
52    53    0.361855   0.358787     0.003069
53    54    0.361321   0.352169     0.009151
54    55    0.355023   0.346789     0.008234
55    56    0.352281   0.340953     0.011327
56    57    0.358045   0.354108     0.003937
70    71    0.352234   0.348328     0.003906
73    74    0.354139   0.353127     0.001012
79    80    0.344881   0.343654     0.001227
87    88    0.350672   0.349550     0.001122
93    94    0.352274   0.351150     0.001124
100  101    0.350389   0.349036     0.001353
161  162    0.338332   0.337058     0.001274


In [4]:
# Find all CSV files across all snapshots
import subprocess

result = subprocess.run(
    ['find', '/home/nonroot/snapshots/santa-2025', '-name', '*.csv', '-type', 'f'],
    capture_output=True, text=True
)
csv_files = [f for f in result.stdout.strip().split('\n') if f]
print(f'Found {len(csv_files)} CSV files across all snapshots')

Found 809 CSV files across all snapshots


In [5]:
# Check for overlaps in the ensemble solution
def has_overlap(trees):
    if len(trees) <= 1:
        return False
    tree_index = STRtree(trees)
    for i, poly in enumerate(trees):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i and poly.intersects(trees[idx]) and not poly.touches(trees[idx]):
                intersection = poly.intersection(trees[idx])
                if intersection.area > 1e-10:
                    return True
    return False

def validate_csv(csv_path):
    df = pd.read_csv(csv_path)
    overlaps = []
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        rows = df[df['id'].str.startswith(prefix)]
        if len(rows) != n:
            continue
        
        trees = []
        for _, row in rows.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            deg = parse_value(row['deg'])
            trees.append(get_tree_polygon(x, y, deg))
        
        if has_overlap(trees):
            overlaps.append(n)
    return overlaps

# Validate ensemble
overlaps = validate_csv('/home/code/ensemble_70.559.csv')
if overlaps:
    print(f'WARNING: Overlaps found in N={overlaps}')
else:
    print('No overlaps found - solution is valid!')

