# Experiment 002: Grid-Based Initialization + Backward Propagation

## Goals:
1. Implement grid-based initialization from zaburo kernel (expected score ~88)
2. Apply backward propagation from crodoc kernel (expected score ~74-75)
3. Generate a much better baseline than sample_submission

In [1]:
import math
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
import time

# Set precision for Decimal
getcontext().prec = 25
scale_factor = Decimal('1e15')

print("Imports successful!")

Imports successful!


In [2]:
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        """Initializes the Christmas tree with a specific position and rotation."""
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

print("ChristmasTree class defined!")

ChristmasTree class defined!


In [3]:
def find_best_trees(n: int) -> tuple:
    """Find the best grid-based arrangement for n trees.
    
    Arranges trees in alternating rows with 0° and 180° rotations.
    Searches for optimal (n_even, n_odd) combinations.
    """
    best_score, best_trees = float("inf"), None
    
    for n_even in range(1, n + 1):
        for n_odd in [n_even, n_even - 1]:
            if n_odd < 0:
                continue
                
            all_trees = []
            rest = n
            r = 0
            
            while rest > 0:
                m = min(rest, n_even if r % 2 == 0 else n_odd)
                if m <= 0:
                    break
                rest -= m
    
                angle = 0 if r % 2 == 0 else 180
                x_offset = 0 if r % 2 == 0 else Decimal("0.7") / 2
                y = r // 2 * Decimal("1.0") if r % 2 == 0 else (Decimal("0.8") + (r - 1) // 2 * Decimal("1.0"))
                
                row_trees = [
                    ChristmasTree(
                        center_x=Decimal("0.7") * i + x_offset, 
                        center_y=y, 
                        angle=angle
                    ) 
                    for i in range(m)
                ]
                all_trees.extend(row_trees)
                r += 1
            
            if len(all_trees) != n:
                continue
                
            # Calculate bounding box
            xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / float(scale_factor) for t in all_trees])
            min_x, min_y = xys.min(axis=0)
            max_x, max_y = xys.max(axis=0)
            
            side = max(max_x - min_x, max_y - min_y)
            score = side ** 2
            
            if score < best_score:
                best_score = score
                best_trees = all_trees
    
    return best_score, best_trees

print("find_best_trees function defined!")

# Test on small n
start = time.time()
score, trees = find_best_trees(10)
print(f"N=10: score={score:.6f}, time={time.time()-start:.2f}s")

find_best_trees function defined!
N=10: score=4.840000, time=0.04s


In [4]:
# Generate grid-based solutions for all n=1 to 200
print("Generating grid-based solutions for n=1 to 200...")
print("This may take a few minutes...")

solutions = []
start_time = time.time()

for n in range(1, 201):
    score, trees = find_best_trees(n)
    solutions.append((score, trees))
    
    if n % 20 == 0:
        elapsed = time.time() - start_time
        print(f"N={n}: score={score:.6f}, elapsed={elapsed:.1f}s")

total_time = time.time() - start_time
print(f"\nTotal time: {total_time:.1f}s")

Generating grid-based solutions for n=1 to 200...
This may take a few minutes...


N=20: score=9.922500, elapsed=0.7s


N=40: score=17.640000, elapsed=5.3s


N=60: score=27.040000, elapsed=17.5s


N=80: score=35.402500, elapsed=41.2s


N=100: score=39.690000, elapsed=80.7s


N=120: score=49.000000, elapsed=139.0s


N=140: score=54.022500, elapsed=219.7s


N=160: score=64.802500, elapsed=327.9s


N=180: score=70.560000, elapsed=466.0s


N=200: score=81.000000, elapsed=640.2s

Total time: 640.2s


In [5]:
# Calculate overall score
overall_score = sum(score / n for n, (score, _) in enumerate(solutions, 1))
print(f"Grid-based initialization score: {overall_score:.6f}")
print(f"Target score: 68.922808")
print(f"Gap: {overall_score - 68.922808:.6f}")

Grid-based initialization score: 88.329998
Target score: 68.922808
Gap: 19.407190


In [6]:
# Save grid-based solution to CSV
def to_str(x):
    return f"s{round(float(x), 12)}"

rows = []
for n, (_, all_trees) in enumerate(solutions, 1):
    assert len(all_trees) == n, f"Expected {n} trees, got {len(all_trees)}"
    for i_t, tree in enumerate(all_trees):
        rows.append({
            "id": f"{n:03d}_{i_t}",
            "x": to_str(tree.center_x),
            "y": to_str(tree.center_y),
            "deg": to_str(tree.angle),
        })

df_grid = pd.DataFrame(rows)
df_grid.to_csv('/home/code/experiments/002_grid_initialization/grid_solution.csv', index=False)
print(f"Saved grid solution with {len(df_grid)} rows")
print(df_grid.head(10))

Saved grid solution with 20100 rows
      id      x     y     deg
0  001_0   s0.0  s0.0    s0.0
1  002_0   s0.0  s0.0    s0.0
2  002_1  s0.35  s0.8  s180.0
3  003_0   s0.0  s0.0    s0.0
4  003_1   s0.7  s0.0    s0.0
5  003_2  s0.35  s0.8  s180.0
6  004_0   s0.0  s0.0    s0.0
7  004_1   s0.7  s0.0    s0.0
8  004_2  s0.35  s0.8  s180.0
9  004_3  s1.05  s0.8  s180.0


In [None]:
# Implement backward propagation\n# Start from n=200 and work backward to n=1\n# For each n, try removing each tree and keep the best result\n\ndef calculate_side_length(trees):\n    \"\"\"Calculate the bounding square side length for a list of trees.\"\"\"\n    if not trees:\n        return Decimal('0')\n    \n    all_polygons = [t.polygon for t in trees]\n    bounds = unary_union(all_polygons).bounds\n    \n    minx = Decimal(str(bounds[0])) / scale_factor\n    miny = Decimal(str(bounds[1])) / scale_factor\n    maxx = Decimal(str(bounds[2])) / scale_factor\n    maxy = Decimal(str(bounds[3])) / scale_factor\n    \n    width = maxx - minx\n    height = maxy - miny\n    side_length = max(width, height)\n    \n    return float(side_length)\n\ndef calculate_score(trees, n_trees):\n    \"\"\"Calculate normalized score: side_length^2 / n_trees.\"\"\"\n    side = calculate_side_length(trees)\n    return side * side / n_trees\n\nprint(\"Helper functions defined!\")\n\n# Test\ntest_trees = solutions[9][1]  # n=10\ntest_side = calculate_side_length(test_trees)\nprint(f\"N=10: side={test_side:.6f}, score={test_side**2/10:.6f}\")

In [None]:
# Backward propagation: improve smaller configs using larger ones\nprint(\"Running backward propagation...\")\nprint(\"This propagates good configurations from larger to smaller n\")\n\n# Store best solutions\nbest_solutions = {}\nfor n, (score, trees) in enumerate(solutions, 1):\n    best_solutions[n] = {\n        'trees': trees,\n        'score': score / n,  # normalized score\n        'side': np.sqrt(score)\n    }\n\n# Track improvements\nimprovements = []\n\n# Backward propagation: start from n=200, go down to n=2\nfor n in range(200, 1, -1):\n    current_trees = best_solutions[n]['trees']\n    current_side = best_solutions[n]['side']\n    \n    # Try removing each tree to get a (n-1) configuration\n    best_removal_side = float('inf')\n    best_removal_trees = None\n    \n    for i in range(len(current_trees)):\n        # Create configuration without tree i\n        remaining_trees = current_trees[:i] + current_trees[i+1:]\n        \n        if len(remaining_trees) != n - 1:\n            continue\n            \n        # Calculate side length\n        side = calculate_side_length(remaining_trees)\n        \n        if side < best_removal_side:\n            best_removal_side = side\n            best_removal_trees = remaining_trees\n    \n    # Compare with existing (n-1) solution\n    existing_side = best_solutions[n-1]['side']\n    \n    if best_removal_side < existing_side:\n        # Found improvement!\n        improvement = existing_side - best_removal_side\n        improvements.append({\n            'n': n-1,\n            'old_side': existing_side,\n            'new_side': best_removal_side,\n            'improvement': improvement,\n            'improvement_pct': (improvement / existing_side) * 100\n        })\n        \n        best_solutions[n-1] = {\n            'trees': best_removal_trees,\n            'score': best_removal_side ** 2 / (n-1),\n            'side': best_removal_side\n        }\n    \n    if n % 20 == 0:\n        print(f\"Processed n={n}\")\n\nprint(f\"\\nBackward propagation complete!\")\nprint(f\"Found {len(improvements)} improvements\")

In [None]:
# Calculate new overall score after backward propagation\nnew_overall_score = sum(best_solutions[n]['score'] for n in range(1, 201))\nprint(f\"Score after backward propagation: {new_overall_score:.6f}\")\nprint(f\"Grid-based score: {overall_score:.6f}\")\nprint(f\"Improvement: {overall_score - new_overall_score:.6f}\")\nprint(f\"Target score: 68.922808\")\nprint(f\"Gap to target: {new_overall_score - 68.922808:.6f}\")"}, {"cell_type": "code", "content": "# Show top improvements\nif improvements:\n    improvements_df = pd.DataFrame(improvements)\n    improvements_df = improvements_df.sort_values('improvement_pct', ascending=False)\n    print(f\"\\nTop 10 improvements from backward propagation:\")\n    print(improvements_df.head(10).to_string(index=False))\n    print(f\"\\nMean improvement: {improvements_df['improvement_pct'].mean():.2f}%\")\nelse:\n    print(\"No improvements found\")