# Experiment 006: Zaburo Grid-Based Initial Solutions

Generate grid-based solutions with alternating row orientations (0째 and 180째).
This provides DIFFERENT initial configurations that might land in different basins of attraction.

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
import json
import os
import shutil

# Set precision for Decimal
getcontext().prec = 25
scale_factor = Decimal('1e15')

# Tree shape coordinates
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

class ChristmasTree:
    """Represents a single, rotatable Christmas tree."""
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)
        
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h
        
        initial_polygon = Polygon([
            (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
        ])
        
        rotated = rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = translate(rotated, 
                                 xoff=float(self.center_x * scale_factor),
                                 yoff=float(self.center_y * scale_factor))

print("ChristmasTree class defined")

ChristmasTree class defined


In [2]:
def find_best_trees(n: int):
    """Find the best grid-based arrangement for n trees.
    
    Tries different row configurations (n_even, n_odd) and returns the best one.
    Trees are arranged in rows with alternating orientations (0째 and 180째).
    """
    best_score, best_trees = float("inf"), None
    
    for n_even in range(1, n + 1):
        for n_odd in [n_even, n_even - 1]:
            if n_odd < 0:
                continue
                
            all_trees = []
            rest = n
            r = 0
            
            while rest > 0:
                m = min(rest, n_even if r % 2 == 0 else n_odd)
                if m <= 0:
                    break
                rest -= m
                
                angle = 0 if r % 2 == 0 else 180
                x_offset = 0 if r % 2 == 0 else Decimal("0.7") / 2
                y = r // 2 * Decimal("1.0") if r % 2 == 0 else (Decimal("0.8") + (r - 1) // 2 * Decimal("1.0"))
                
                row_trees = [
                    ChristmasTree(
                        center_x=str(Decimal("0.7") * i + x_offset), 
                        center_y=str(y), 
                        angle=str(angle)
                    ) 
                    for i in range(m)
                ]
                all_trees.extend(row_trees)
                r += 1
            
            if len(all_trees) != n:
                continue
                
            # Calculate bounding box
            xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / 1e15 for t in all_trees])
            min_x, min_y = xys.min(axis=0)
            max_x, max_y = xys.max(axis=0)
            score = max(max_x - min_x, max_y - min_y) ** 2
            
            if score < best_score:
                best_score = score
                best_trees = all_trees
    
    return best_score, best_trees

print("find_best_trees function defined")

# Test with N=10
test_score, test_trees = find_best_trees(10)
print(f"Test N=10: score={test_score:.6f}, trees={len(test_trees)}")

find_best_trees function defined
Test N=10: score=4.840000, trees=10


In [3]:
# Generate grid-based solutions for all N=1-200
print("Generating grid-based solutions for N=1-200...")

zaburo_solutions = {}
zaburo_scores = {}

for n in range(1, 201):
    score, trees = find_best_trees(n)
    zaburo_solutions[n] = trees
    zaburo_scores[n] = score
    if n <= 10 or n % 20 == 0:
        print(f"N={n}: score={score:.6f}")

zaburo_total = sum(score / n for n, score in zaburo_scores.items())
print(f"\nZaburo total score: {zaburo_total:.6f}")

Generating grid-based solutions for N=1-200...
N=1: score=1.000000
N=2: score=1.440000
N=3: score=1.960000
N=4: score=3.062500
N=5: score=4.000000
N=6: score=4.000000
N=7: score=4.410000
N=8: score=4.410000
N=9: score=4.840000
N=10: score=4.840000


N=20: score=9.922500


N=40: score=17.640000


N=60: score=27.040000


N=80: score=35.402500


N=100: score=39.690000


N=120: score=49.000000


N=140: score=54.022500


N=160: score=64.802500


N=180: score=70.560000


N=200: score=81.000000

Zaburo total score: 88.329998


In [4]:
# Load baseline for comparison
def parse_submission(filepath):
    df = pd.read_csv(filepath)
    def parse_val(v):
        if isinstance(v, str) and v.startswith('s'):
            return float(v[1:])
        return float(v)
    df['x_val'] = df['x'].apply(parse_val)
    df['y_val'] = df['y'].apply(parse_val)
    df['deg_val'] = df['deg'].apply(parse_val)
    df['N'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    result = {}
    for n, group in df.groupby('N'):
        trees = list(zip(group['x_val'], group['y_val'], group['deg_val']))
        result[n] = trees
    return result

def create_tree_polygon(x, y, deg):
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = rotate(poly, deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

def calculate_bounding_box_side(trees):
    all_x, all_y = [], []
    for x, y, deg in trees:
        poly = create_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

baseline_path = '/home/code/exploration/datasets/submission.csv'
baseline_trees = parse_submission(baseline_path)
baseline_scores = {n: calculate_bounding_box_side(trees)**2 for n, trees in baseline_trees.items()}
baseline_total = sum(score / n for n, score in baseline_scores.items())

print(f"Baseline total score: {baseline_total:.6f}")
print(f"Zaburo total score: {zaburo_total:.6f}")
print(f"Difference: {zaburo_total - baseline_total:.6f}")

Baseline total score: 70.647327
Zaburo total score: 88.329998
Difference: 17.682671


In [5]:
# Compare per-N scores
print("\nPer-N comparison (Zaburo vs Baseline):")
print("N\tBaseline\tZaburo\t\tDiff\t\tBetter")

zaburo_wins = 0
baseline_wins = 0

for n in range(1, 201):
    base_score = baseline_scores[n] / n
    zab_score = zaburo_scores[n] / n
    diff = zab_score - base_score
    better = "ZABURO" if zab_score < base_score else "BASELINE"
    
    if zab_score < base_score:
        zaburo_wins += 1
    else:
        baseline_wins += 1
    
    if n <= 20 or n % 20 == 0:
        print(f"{n}\t{base_score:.6f}\t{zab_score:.6f}\t{diff:+.6f}\t{better}")

print(f"\nZaburo wins: {zaburo_wins}")
print(f"Baseline wins: {baseline_wins}")


Per-N comparison (Zaburo vs Baseline):
N	Baseline	Zaburo		Diff		Better
1	0.661250	1.000000	+0.338750	BASELINE
2	0.450779	0.720000	+0.269221	BASELINE
3	0.434745	0.653333	+0.218588	BASELINE
4	0.416545	0.765625	+0.349080	BASELINE
5	0.416850	0.800000	+0.383150	BASELINE
6	0.399610	0.666667	+0.267056	BASELINE
7	0.399897	0.630000	+0.230103	BASELINE
8	0.385407	0.551250	+0.165843	BASELINE
9	0.387415	0.537778	+0.150363	BASELINE
10	0.376630	0.484000	+0.107370	BASELINE
11	0.374924	0.545682	+0.170758	BASELINE
12	0.372724	0.500208	+0.127484	BASELINE
13	0.372294	0.603077	+0.230783	BASELINE
14	0.369543	0.560000	+0.190457	BASELINE
15	0.379203	0.600000	+0.220797	BASELINE
16	0.374128	0.562500	+0.188372	BASELINE
17	0.370040	0.529412	+0.159371	BASELINE
18	0.368771	0.500000	+0.131229	BASELINE
19	0.368615	0.522237	+0.153622	BASELINE
20	0.376057	0.496125	+0.120068	BASELINE
40	0.362148	0.441000	+0.078852	BASELINE
60	0.357258	0.450667	+0.093408	BASELINE
80	0.344880	0.442531	+0.097651	BASELINE
100	0.345531	0.39

In [6]:
# Check for overlaps in zaburo solutions
def has_overlap_zaburo(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

print("Checking zaburo solutions for overlaps...")
overlap_n = []
for n in range(1, 201):
    if has_overlap_zaburo(zaburo_solutions[n]):
        overlap_n.append(n)
        print(f"  N={n}: OVERLAP DETECTED")

if len(overlap_n) == 0:
    print("All zaburo solutions are valid (no overlaps)!")
else:
    print(f"\nOverlapping N values: {overlap_n}")

Checking zaburo solutions for overlaps...


All zaburo solutions are valid (no overlaps)!


In [7]:
# Create ensemble: for each N, pick the better of (baseline, zaburo)
print("\nCreating ensemble...")

ensemble_source = {}  # N -> 'baseline' or 'zaburo'
ensemble_scores = {}

for n in range(1, 201):
    base_score = baseline_scores[n] / n
    zab_score = zaburo_scores[n] / n
    
    if zab_score < base_score and n not in overlap_n:
        ensemble_source[n] = 'zaburo'
        ensemble_scores[n] = zab_score
    else:
        ensemble_source[n] = 'baseline'
        ensemble_scores[n] = base_score

ensemble_total = sum(ensemble_scores.values())
print(f"Ensemble total score: {ensemble_total:.6f}")
print(f"Baseline total score: {baseline_total:.6f}")
print(f"Improvement: {baseline_total - ensemble_total:.6f}")

# Count sources
zab_count = sum(1 for s in ensemble_source.values() if s == 'zaburo')
base_count = sum(1 for s in ensemble_source.values() if s == 'baseline')
print(f"\nEnsemble composition: {zab_count} from zaburo, {base_count} from baseline")


Creating ensemble...
Ensemble total score: 70.647327
Baseline total score: 70.647327
Improvement: 0.000000

Ensemble composition: 0 from zaburo, 200 from baseline


In [8]:
# Create submission CSV
print("\nCreating submission CSV...")

rows = []
for n in range(1, 201):
    if ensemble_source[n] == 'zaburo':
        trees = zaburo_solutions[n]
        for i, tree in enumerate(trees):
            rows.append({
                'id': f"{n:03d}_{i}",
                'x': f"s{tree.center_x}",
                'y': f"s{tree.center_y}",
                'deg': f"s{tree.angle}"
            })
    else:
        trees = baseline_trees[n]
        for i, (x, y, deg) in enumerate(trees):
            rows.append({
                'id': f"{n:03d}_{i}",
                'x': f"s{x}",
                'y': f"s{y}",
                'deg': f"s{deg}"
            })

df = pd.DataFrame(rows)
print(f"Submission has {len(df)} rows")

# Save
os.makedirs('/home/submission', exist_ok=True)
df.to_csv('/home/submission/submission.csv', index=False)
print("Saved to /home/submission/submission.csv")


Creating submission CSV...
Submission has 20100 rows
Saved to /home/submission/submission.csv


In [9]:
# Save metrics
metrics = {
    'cv_score': ensemble_total,
    'baseline_score': baseline_total,
    'zaburo_score': zaburo_total,
    'improvement': baseline_total - ensemble_total,
    'zaburo_wins': zab_count,
    'baseline_wins': base_count,
    'overlap_n': overlap_n
}

with open('/home/code/experiments/006_zaburo_grid/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nFinal Score: {ensemble_total:.6f}")
print(f"Target: 68.919")
print(f"Gap to target: {ensemble_total - 68.919:.6f}")


Final Score: 70.647327
Target: 68.919
Gap to target: 1.728327
