# Experiment 004: Ensemble + Constructive Approach

The baseline is at a local optimum that SA cannot escape.
We need a FUNDAMENTALLY DIFFERENT approach:

1. Implement zaburo constructive heuristic (alternating rows)
2. Apply local search to constructive solution
3. Ensemble: take best per-N from baseline vs constructive
4. Track which N values improved

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import json
import math
import time
import warnings
warnings.filterwarnings('ignore')

# Tree shape vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print("Setup complete")

Setup complete


In [2]:
# Core functions
def create_tree_polygon(x, y, angle):
    """Create a tree polygon at position (x, y) with given rotation angle."""
    x, y, angle = float(x), float(y), float(angle)
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def create_scaled_tree_polygon(x, y, angle, scale_factor=1e15):
    """Create tree polygon with scaling for strict precision."""
    x, y, angle = float(x), float(y), float(angle)
    coords = [(tx * scale_factor, ty * scale_factor) for tx, ty in zip(TX, TY)]
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x * scale_factor, yoff=y * scale_factor)
    return poly

def get_bbox_side(trees):
    """Get bounding box side length for a list of trees."""
    if len(trees) == 0:
        return 0
    polygons = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def get_score(trees, n):
    """Get score contribution for N trees."""
    side = get_bbox_side(trees)
    return (side ** 2) / n

def has_overlap(trees):
    """Check for overlaps using strict precision."""
    if len(trees) <= 1:
        return False
    polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    n = len(polygons)
    for i in range(n):
        for j in range(i+1, n):
            if polygons[i].intersects(polygons[j]):
                if not polygons[i].touches(polygons[j]):
                    intersection = polygons[i].intersection(polygons[j])
                    if intersection.area > 0:
                        return True
    return False

def parse_value(val):
    """Parse value from submission format."""
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

print("Core functions defined")

Core functions defined


In [3]:
# Load baseline submission
df = pd.read_csv('/home/code/experiments/001_fix_overlaps/submission.csv')
print(f"Loaded baseline with {len(df)} rows")

# Parse into structured format
baseline_trees_by_n = {}
for _, row in df.iterrows():
    id_parts = row['id'].split('_')
    n = int(id_parts[0])
    idx = int(id_parts[1])
    
    if n not in baseline_trees_by_n:
        baseline_trees_by_n[n] = []
    
    baseline_trees_by_n[n].append({
        'idx': idx,
        'x': parse_value(row['x']),
        'y': parse_value(row['y']),
        'deg': parse_value(row['deg'])
    })

print(f"Parsed trees for N=1 to {max(baseline_trees_by_n.keys())}")

# Calculate baseline scores
baseline_per_n = {}
for n in range(1, 201):
    baseline_per_n[n] = get_score(baseline_trees_by_n[n], n)

baseline_total = sum(baseline_per_n.values())
print(f"Baseline total score: {baseline_total:.6f}")

Loaded baseline with 20100 rows


Parsed trees for N=1 to 200


Baseline total score: 70.622435


In [4]:
# STEP 1: Implement Zaburo Constructive Heuristic
print("=" * 60)
print("STEP 1: ZABURO CONSTRUCTIVE HEURISTIC")
print("=" * 60)

def construct_alternating_rows(n):
    """Build solution using alternating rows of 0° and 180° trees.
    
    Based on zaburo kernel approach:
    - Even rows: trees at angle 0°
    - Odd rows: trees at angle 180°, offset by half tree width
    - Trees interlock for efficient packing
    """
    best_score = float('inf')
    best_trees = None
    
    # Try different row configurations
    for n_even in range(1, n + 1):
        for n_odd in [n_even, n_even - 1, n_even + 1]:
            if n_odd < 0:
                continue
                
            all_trees = []
            rest = n
            r = 0
            
            while rest > 0:
                # Number of trees in this row
                if r % 2 == 0:
                    m = min(rest, n_even)
                else:
                    m = min(rest, n_odd)
                
                if m <= 0:
                    break
                    
                rest -= m
                
                # Angle and position
                angle = 0 if r % 2 == 0 else 180
                x_offset = 0 if r % 2 == 0 else 0.35  # Half tree width for offset
                
                # Y position: alternating rows interlock
                # Row 0: y=0, Row 1: y=0.8, Row 2: y=1.0, Row 3: y=1.8, etc.
                if r % 2 == 0:
                    y = (r // 2) * 1.0
                else:
                    y = 0.8 + ((r - 1) // 2) * 1.0
                
                for i in range(m):
                    all_trees.append({
                        'idx': len(all_trees),
                        'x': str(0.7 * i + x_offset),
                        'y': str(y),
                        'deg': str(angle)
                    })
                
                r += 1
            
            if len(all_trees) != n:
                continue
            
            # Check for overlaps
            if has_overlap(all_trees):
                continue
            
            # Calculate score
            score = get_score(all_trees, n)
            if score < best_score:
                best_score = score
                best_trees = all_trees
    
    return best_trees, best_score

print("Constructive function defined")

STEP 1: ZABURO CONSTRUCTIVE HEURISTIC
Constructive function defined


In [5]:
# Test constructive approach on a few N values
print("\nTesting constructive approach on N=10, 20, 30...")

for test_n in [10, 20, 30]:
    trees, score = construct_alternating_rows(test_n)
    baseline_score = baseline_per_n[test_n]
    
    if trees:
        print(f"N={test_n}: constructive={score:.6f}, baseline={baseline_score:.6f}, diff={score - baseline_score:+.6f}")
    else:
        print(f"N={test_n}: constructive FAILED to find valid solution")


Testing constructive approach on N=10, 20, 30...
N=10: constructive=1.600000, baseline=0.376630, diff=+1.223370
N=20: constructive=2.450000, baseline=0.376057, diff=+2.073943


N=30: constructive FAILED to find valid solution


In [6]:
# Generate constructive solutions for ALL N values
print("\n" + "=" * 60)
print("GENERATING CONSTRUCTIVE SOLUTIONS FOR ALL N")
print("=" * 60)

constructive_trees_by_n = {}
constructive_per_n = {}

start_time = time.time()

for n in range(1, 201):
    if n == 1:
        # N=1 is special - just use optimal angle 45°
        constructive_trees_by_n[n] = [{'idx': 0, 'x': '0', 'y': '0', 'deg': '45'}]
        constructive_per_n[n] = get_score(constructive_trees_by_n[n], n)
    else:
        trees, score = construct_alternating_rows(n)
        if trees:
            constructive_trees_by_n[n] = trees
            constructive_per_n[n] = score
        else:
            # Fallback to baseline if constructive fails
            constructive_trees_by_n[n] = [dict(t) for t in baseline_trees_by_n[n]]
            constructive_per_n[n] = baseline_per_n[n]
    
    if n % 50 == 0:
        print(f"  Progress: N={n}/200")

constructive_total = sum(constructive_per_n.values())
print(f"\nConstructive total score: {constructive_total:.6f}")
print(f"Baseline total score: {baseline_total:.6f}")
print(f"Time: {time.time() - start_time:.1f}s")


GENERATING CONSTRUCTIVE SOLUTIONS FOR ALL N


  Progress: N=50/200


  Progress: N=100/200


  Progress: N=150/200


  Progress: N=200/200

Constructive total score: 110.184440
Baseline total score: 70.622435
Time: 597.1s


In [7]:
# STEP 2: Compare per-N scores
print("\n" + "=" * 60)
print("STEP 2: PER-N COMPARISON")
print("=" * 60)

constructive_better = []
baseline_better = []

for n in range(1, 201):
    c_score = constructive_per_n[n]
    b_score = baseline_per_n[n]
    diff = b_score - c_score  # positive = constructive is better
    
    if diff > 0.0001:
        constructive_better.append((n, diff))
    elif diff < -0.0001:
        baseline_better.append((n, -diff))

print(f"\nN values where CONSTRUCTIVE is better: {len(constructive_better)}")
if constructive_better:
    print("Top 10 improvements:")
    for n, diff in sorted(constructive_better, key=lambda x: -x[1])[:10]:
        print(f"  N={n}: +{diff:.6f}")

print(f"\nN values where BASELINE is better: {len(baseline_better)}")
if baseline_better:
    print("Top 10 where baseline wins:")
    for n, diff in sorted(baseline_better, key=lambda x: -x[1])[:10]:
        print(f"  N={n}: baseline better by {diff:.6f}")


STEP 2: PER-N COMPARISON

N values where CONSTRUCTIVE is better: 0

N values where BASELINE is better: 26
Top 10 where baseline wins:
  N=25: baseline better by 2.867856
  N=27: baseline better by 2.772096
  N=26: baseline better by 2.741388
  N=22: baseline better by 2.533833
  N=24: baseline better by 2.436161
  N=23: baseline better by 2.413858
  N=19: baseline better by 2.210332
  N=21: baseline better by 2.092121
  N=20: baseline better by 2.073943
  N=16: baseline better by 1.875872


In [8]:
# STEP 3: ENSEMBLE - Take best per-N
print("\n" + "=" * 60)
print("STEP 3: ENSEMBLE - TAKE BEST PER-N")
print("=" * 60)

ensemble_trees_by_n = {}
ensemble_per_n = {}
ensemble_sources = {}  # Track which source was used for each N

for n in range(1, 201):
    c_score = constructive_per_n[n]
    b_score = baseline_per_n[n]
    
    if c_score < b_score:
        ensemble_trees_by_n[n] = constructive_trees_by_n[n]
        ensemble_per_n[n] = c_score
        ensemble_sources[n] = 'constructive'
    else:
        ensemble_trees_by_n[n] = [dict(t) for t in baseline_trees_by_n[n]]
        ensemble_per_n[n] = b_score
        ensemble_sources[n] = 'baseline'

ensemble_total = sum(ensemble_per_n.values())

print(f"\nEnsemble total score: {ensemble_total:.6f}")
print(f"Baseline total score: {baseline_total:.6f}")
print(f"Constructive total score: {constructive_total:.6f}")
print(f"\nEnsemble improvement over baseline: {baseline_total - ensemble_total:.6f}")

# Count sources
from collections import Counter
source_counts = Counter(ensemble_sources.values())
print(f"\nSource distribution: {dict(source_counts)}")


STEP 3: ENSEMBLE - TAKE BEST PER-N

Ensemble total score: 70.622435
Baseline total score: 70.622435
Constructive total score: 110.184440

Ensemble improvement over baseline: 0.000000

Source distribution: {'baseline': 200}


In [9]:
# STEP 4: Validate no overlaps in ensemble
print("\n" + "=" * 60)
print("STEP 4: VALIDATION")
print("=" * 60)

overlap_errors = []
for n in range(1, 201):
    trees = ensemble_trees_by_n[n]
    if n > 1 and has_overlap(trees):
        overlap_errors.append(n)
        print(f"  ⚠️ N={n}: Has overlaps")

if overlap_errors:
    print(f"\n❌ {len(overlap_errors)} N values have overlaps!")
else:
    print("\n✅ All N values pass overlap validation!")


STEP 4: VALIDATION



✅ All N values pass overlap validation!


In [10]:
# Create submission
print("\n" + "=" * 60)
print("CREATING SUBMISSION")
print("=" * 60)

rows = []
for n in range(1, 201):
    trees = ensemble_trees_by_n[n]
    for i, t in enumerate(trees):
        rows.append({
            'id': f"{n:03d}_{i}",
            'x': f"s{t['x']}",
            'y': f"s{t['y']}",
            'deg': f"s{t['deg']}"
        })

submission_df = pd.DataFrame(rows)
print(f"Submission shape: {submission_df.shape}")

submission_df.to_csv('/home/code/experiments/004_ensemble_constructive/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print("Submission saved!")


CREATING SUBMISSION
Submission shape: (20100, 4)
Submission saved!


In [11]:
# Save metrics
metrics = {
    'cv_score': ensemble_total,
    'baseline_score': baseline_total,
    'constructive_score': constructive_total,
    'improvement': baseline_total - ensemble_total,
    'n_constructive_better': len(constructive_better),
    'n_baseline_better': len(baseline_better),
    'overlap_errors': len(overlap_errors),
    'target': 68.888293,
    'gap': ensemble_total - 68.888293
}

with open('/home/code/experiments/004_ensemble_constructive/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("\nMetrics saved!")
print(f"\n" + "=" * 60)
print("FINAL RESULTS")
print("=" * 60)
print(f"Baseline score: {baseline_total:.6f}")
print(f"Constructive score: {constructive_total:.6f}")
print(f"Ensemble score: {ensemble_total:.6f}")
print(f"Improvement: {baseline_total - ensemble_total:.6f}")
print(f"Target: 68.888293")
print(f"Gap to target: {ensemble_total - 68.888293:.6f}")


Metrics saved!

FINAL RESULTS
Baseline score: 70.622435
Constructive score: 110.184440
Ensemble score: 70.622435
Improvement: 0.000000
Target: 68.888293
Gap to target: 1.734142
