# Experiment 003: Greedy Optimization from Scratch

Now that we've confirmed our format is correct (exp_002 was ACCEPTED), we create an optimized submission using the getting-started kernel's greedy algorithm.

Key insights:
- N=1 is already optimal at 45° angle
- Use greedy placement for all N values
- Validate ALL N values before submission

In [1]:
import math
import os
import random
from decimal import Decimal, getcontext
import numpy as np
import pandas as pd
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import json
from tqdm import tqdm

# Set precision for Decimal - EXACTLY as in getting-started kernel
getcontext().prec = 25
scale_factor = Decimal('1e15')

print(f"Decimal precision: {getcontext().prec}")
print(f"Scale factor: {scale_factor}")

Decimal precision: 25
Scale factor: 1E+15


In [2]:
# EXACT ChristmasTree class from getting-started kernel
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        """Initializes the Christmas tree with a specific position and rotation."""
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon(
            [
                (Decimal('0.0') * scale_factor, tip_y * scale_factor),
                (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
                (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
                (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
                (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
                (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
                (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
                (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
                (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
            ]
        )
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

print("ChristmasTree class defined")

ChristmasTree class defined


In [3]:
def generate_weighted_angle():
    """Generates a random angle weighted by abs(sin(2*angle))."""
    while True:
        angle = random.uniform(0, 2 * math.pi)
        if random.uniform(0, 1) < abs(math.sin(2 * angle)):
            return angle

def check_overlap_kaggle_style(trees):
    """Check for overlaps using Kaggle's exact method."""
    polygons = [t.polygon for t in trees]
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                return True, f"Trees {i} and {j} overlap"
    return False, "OK"

def calculate_side_length(trees):
    """Calculate bounding box side length."""
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor
    width = maxx - minx
    height = maxy - miny
    return max(width, height)

print("Helper functions defined")

Helper functions defined


In [4]:
def initialize_trees(num_trees, existing_trees=None):
    """
    Greedy placement algorithm from getting-started kernel.
    Places trees by moving from far away toward center until collision,
    then backing up.
    """
    if num_trees == 0:
        return [], Decimal('0')

    if existing_trees is None:
        placed_trees = []
    else:
        placed_trees = list(existing_trees)

    num_to_add = num_trees - len(placed_trees)

    if num_to_add > 0:
        # Use 45 degree angle for all trees (optimal for single tree)
        unplaced_trees = [
            ChristmasTree(angle=str(random.choice([0, 45, 90, 135, 180, 225, 270, 315]))) 
            for _ in range(num_to_add)
        ]
        
        if not placed_trees:
            # First tree at origin with 45 degree angle (optimal)
            placed_trees.append(ChristmasTree(center_x='0', center_y='0', angle='45'))
            if unplaced_trees:
                unplaced_trees.pop(0)

        for tree_to_place in unplaced_trees:
            placed_polygons = [p.polygon for p in placed_trees]
            tree_index = STRtree(placed_polygons)

            best_px = None
            best_py = None
            min_radius = Decimal('Infinity')

            # Try 10 random starting attempts
            for _ in range(10):
                angle = generate_weighted_angle()
                vx = Decimal(str(math.cos(angle)))
                vy = Decimal(str(math.sin(angle)))

                radius = Decimal('20.0')
                step_in = Decimal('0.5')

                collision_found = False
                while radius >= 0:
                    px = radius * vx
                    py = radius * vy

                    candidate_poly = affinity.translate(
                        tree_to_place.polygon,
                        xoff=float(px * scale_factor),
                        yoff=float(py * scale_factor))

                    possible_indices = tree_index.query(candidate_poly)
                    if any((candidate_poly.intersects(placed_polygons[i]) and not
                            candidate_poly.touches(placed_polygons[i]))
                           for i in possible_indices):
                        collision_found = True
                        break
                    radius -= step_in

                if collision_found:
                    step_out = Decimal('0.05')
                    while True:
                        radius += step_out
                        px = radius * vx
                        py = radius * vy

                        candidate_poly = affinity.translate(
                            tree_to_place.polygon,
                            xoff=float(px * scale_factor),
                            yoff=float(py * scale_factor))

                        possible_indices = tree_index.query(candidate_poly)
                        if not any((candidate_poly.intersects(placed_polygons[i]) and not
                                   candidate_poly.touches(placed_polygons[i]))
                                   for i in possible_indices):
                            break
                else:
                    radius = Decimal('0')
                    px = Decimal('0')
                    py = Decimal('0')

                if radius < min_radius:
                    min_radius = radius
                    best_px = px
                    best_py = py

            tree_to_place.center_x = best_px
            tree_to_place.center_y = best_py
            tree_to_place.polygon = affinity.translate(
                tree_to_place.polygon,
                xoff=float(tree_to_place.center_x * scale_factor),
                yoff=float(tree_to_place.center_y * scale_factor),
            )
            placed_trees.append(tree_to_place)

    all_polygons = [t.polygon for t in placed_trees]
    bounds = unary_union(all_polygons).bounds

    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor

    width = maxx - minx
    height = maxy - miny
    side_length = max(width, height)

    return placed_trees, side_length

print("Greedy placement function defined")

Greedy placement function defined


In [5]:
# Generate all configurations from scratch
random.seed(42)  # For reproducibility

all_trees = {}
current_placed_trees = []

print("Generating configurations for N=1 to N=200...")
for n in tqdm(range(1, 201)):
    current_placed_trees, side = initialize_trees(n, existing_trees=current_placed_trees)
    # Store a copy of the trees for this N
    all_trees[n] = [ChristmasTree(center_x=str(t.center_x), center_y=str(t.center_y), angle=str(t.angle)) 
                    for t in current_placed_trees]

print(f"\nGenerated configurations for all N values")

Generating configurations for N=1 to N=200...


  0%|          | 0/200 [00:00<?, ?it/s]

  4%|▍         | 9/200 [00:00<00:02, 81.79it/s]

  9%|▉         | 18/200 [00:00<00:02, 70.92it/s]

 13%|█▎        | 26/200 [00:00<00:02, 67.92it/s]

 16%|█▋        | 33/200 [00:00<00:02, 65.39it/s]

 20%|██        | 40/200 [00:00<00:02, 62.39it/s]

 24%|██▎       | 47/200 [00:00<00:02, 59.19it/s]

 26%|██▋       | 53/200 [00:00<00:02, 56.50it/s]

 30%|██▉       | 59/200 [00:00<00:02, 54.02it/s]

 32%|███▎      | 65/200 [00:01<00:02, 51.63it/s]

 36%|███▌      | 71/200 [00:01<00:02, 48.63it/s]

 38%|███▊      | 76/200 [00:01<00:02, 46.55it/s]

 40%|████      | 81/200 [00:01<00:02, 44.85it/s]

 43%|████▎     | 86/200 [00:01<00:02, 43.13it/s]

 46%|████▌     | 91/200 [00:01<00:02, 41.53it/s]

 48%|████▊     | 96/200 [00:01<00:02, 40.12it/s]

 50%|█████     | 101/200 [00:02<00:02, 38.89it/s]

 52%|█████▎    | 105/200 [00:02<00:02, 37.75it/s]

 55%|█████▍    | 109/200 [00:02<00:02, 36.54it/s]

 56%|█████▋    | 113/200 [00:02<00:02, 35.54it/s]

 58%|█████▊    | 117/200 [00:02<00:02, 34.79it/s]

 60%|██████    | 121/200 [00:02<00:02, 34.02it/s]

 62%|██████▎   | 125/200 [00:02<00:02, 32.89it/s]

 64%|██████▍   | 129/200 [00:02<00:02, 32.02it/s]

 66%|██████▋   | 133/200 [00:03<00:02, 31.24it/s]

 68%|██████▊   | 137/200 [00:03<00:02, 30.53it/s]

 70%|███████   | 141/200 [00:03<00:01, 30.02it/s]

 72%|███████▎  | 145/200 [00:03<00:01, 29.21it/s]

 74%|███████▍  | 148/200 [00:03<00:01, 27.84it/s]

 76%|███████▌  | 151/200 [00:03<00:01, 26.34it/s]

 77%|███████▋  | 154/200 [00:03<00:01, 26.27it/s]

 78%|███████▊  | 157/200 [00:03<00:01, 26.10it/s]

 80%|████████  | 160/200 [00:04<00:01, 25.92it/s]

 82%|████████▏ | 163/200 [00:04<00:01, 25.77it/s]

 83%|████████▎ | 166/200 [00:04<00:01, 25.52it/s]

 84%|████████▍ | 169/200 [00:04<00:01, 25.23it/s]

 86%|████████▌ | 172/200 [00:04<00:01, 24.67it/s]

 88%|████████▊ | 175/200 [00:04<00:01, 24.25it/s]

 89%|████████▉ | 178/200 [00:04<00:00, 24.11it/s]

 90%|█████████ | 181/200 [00:04<00:00, 23.86it/s]

 92%|█████████▏| 184/200 [00:05<00:00, 23.68it/s]

 94%|█████████▎| 187/200 [00:05<00:00, 23.30it/s]

 95%|█████████▌| 190/200 [00:05<00:00, 22.98it/s]

 96%|█████████▋| 193/200 [00:05<00:00, 22.74it/s]

 98%|█████████▊| 196/200 [00:05<00:00, 22.49it/s]

100%|█████████▉| 199/200 [00:05<00:00, 22.31it/s]

100%|██████████| 200/200 [00:05<00:00, 34.60it/s]


Generated configurations for all N values





In [6]:
# Validate ALL N values
print("Validating all N values...")
overlapping_n = []

for n in tqdm(range(1, 201), desc="Validating"):
    has_overlap, msg = check_overlap_kaggle_style(all_trees[n])
    if has_overlap:
        overlapping_n.append(n)
        print(f"N={n}: {msg}")

print(f"\nTotal N values with overlaps: {len(overlapping_n)}")
if overlapping_n:
    print(f"Overlapping N values: {overlapping_n}")
else:
    print("✓ All N values validated - NO OVERLAPS!")

Validating all N values...


Validating:   0%|          | 0/200 [00:00<?, ?it/s]

Validating:  24%|██▍       | 49/200 [00:00<00:00, 486.71it/s]

Validating:  49%|████▉     | 98/200 [00:00<00:00, 111.51it/s]

Validating:  62%|██████▏   | 123/200 [00:01<00:01, 64.99it/s]

Validating:  69%|██████▉   | 138/200 [00:02<00:01, 48.73it/s]

Validating:  74%|███████▍  | 148/200 [00:02<00:01, 39.87it/s]

Validating:  78%|███████▊  | 156/200 [00:03<00:01, 33.28it/s]

Validating:  81%|████████  | 162/200 [00:03<00:01, 28.94it/s]

Validating:  84%|████████▎ | 167/200 [00:03<00:01, 25.56it/s]

Validating:  86%|████████▌ | 171/200 [00:04<00:01, 23.15it/s]

Validating:  87%|████████▋ | 174/200 [00:04<00:01, 21.33it/s]

Validating:  88%|████████▊ | 177/200 [00:04<00:01, 19.45it/s]

Validating:  90%|█████████ | 180/200 [00:04<00:01, 17.74it/s]

Validating:  91%|█████████ | 182/200 [00:04<00:01, 16.79it/s]

Validating:  92%|█████████▏| 184/200 [00:05<00:01, 15.69it/s]

Validating:  93%|█████████▎| 186/200 [00:05<00:00, 14.73it/s]

Validating:  94%|█████████▍| 188/200 [00:05<00:00, 13.96it/s]

Validating:  95%|█████████▌| 190/200 [00:05<00:00, 13.25it/s]

Validating:  96%|█████████▌| 192/200 [00:05<00:00, 12.73it/s]

Validating:  97%|█████████▋| 194/200 [00:05<00:00, 12.31it/s]

Validating:  98%|█████████▊| 196/200 [00:06<00:00, 11.92it/s]

Validating:  99%|█████████▉| 198/200 [00:06<00:00, 11.58it/s]

Validating: 100%|██████████| 200/200 [00:06<00:00, 11.15it/s]

Validating: 100%|██████████| 200/200 [00:06<00:00, 30.90it/s]


Total N values with overlaps: 0
✓ All N values validated - NO OVERLAPS!





In [7]:
# Calculate total score
total_score = 0
per_n_scores = {}

for n in range(1, 201):
    side = calculate_side_length(all_trees[n])
    score = float(side ** 2) / n
    per_n_scores[n] = score
    total_score += score

print(f"Total score: {total_score:.6f}")
print(f"\nTop 10 score contributors:")
sorted_scores = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    side = calculate_side_length(all_trees[n])
    print(f"N={n:3d}: side={float(side):.6f}, score={score:.6f}")

Total score: 169.458992

Top 10 score contributors:
N= 21: side=4.619354, score=1.016116
N= 22: side=4.688663, score=0.999253
N= 17: side=4.073132, score=0.975906
N= 34: side=5.757969, score=0.975124
N= 81: side=8.844249, score=0.965688
N= 18: side=4.159316, score=0.961106
N= 11: side=3.250650, score=0.960611
N= 23: side=4.688663, score=0.955807
N= 82: side=8.844249, score=0.953911
N= 40: side=6.173244, score=0.952724


In [8]:
# Create submission DataFrame
index = [f'{n:03d}_{t}' for n in range(1, 201) for t in range(n)]
tree_data = []

for n in range(1, 201):
    for tree in all_trees[n]:
        tree_data.append([tree.center_x, tree.center_y, tree.angle])

cols = ['x', 'y', 'deg']
submission = pd.DataFrame(index=index, columns=cols, data=tree_data).rename_axis('id')

# Format EXACTLY as in getting-started kernel
for col in cols:
    submission[col] = submission[col].astype(float).round(decimals=6)
    
# Prepend 's' to ensure string format
for col in submission.columns:
    submission[col] = 's' + submission[col].astype('string')

print(f"Submission shape: {submission.shape}")
print(f"Expected: (20100, 3)")
print(f"\nFirst 5 rows:")
print(submission.head())

Submission shape: (20100, 3)
Expected: (20100, 3)

First 5 rows:
               x          y    deg
id                                
001_0       s0.0       s0.0  s45.0
002_0       s0.0       s0.0  s45.0
002_1  s0.374247  s0.249878   s0.0
003_0       s0.0       s0.0  s45.0
003_1  s0.374247  s0.249878   s0.0


In [9]:
# Save submission
import shutil

os.makedirs('/home/submission', exist_ok=True)
submission.to_csv('/home/submission/submission.csv')
submission.to_csv('/home/code/experiments/003_greedy_from_scratch/submission.csv')

# Save metrics
metrics = {
    'cv_score': total_score,
    'overlapping_n_count': len(overlapping_n),
    'per_n_scores': {str(k): v for k, v in per_n_scores.items()}
}

with open('/home/code/experiments/003_greedy_from_scratch/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nSubmission saved!")
print(f"CV Score: {total_score:.6f}")
print(f"Overlapping N values: {len(overlapping_n)}")
print(f"\nThis is a greedy solution created FROM SCRATCH.")
print(f"It should be ACCEPTED by Kaggle and have a much better score than wide-spacing (769.9).")


Submission saved!
CV Score: 169.458992
Overlapping N values: 0

This is a greedy solution created FROM SCRATCH.
It should be ACCEPTED by Kaggle and have a much better score than wide-spacing (769.9).


In [10]:
# Compare to baseline per-N scores
print("\nComparison to baseline (exp_001 had score 70.615745):")
print(f"Our greedy score: {total_score:.6f}")
print(f"Difference: {total_score - 70.615745:.6f}")

if total_score < 70.615745:
    print("✓ BETTER than baseline!")
else:
    print("✗ Worse than baseline (expected for simple greedy)")
    print("\nNote: The baseline used highly optimized solutions.")
    print("Our greedy approach is simpler but GUARANTEED to be valid.")


Comparison to baseline (exp_001 had score 70.615745):
Our greedy score: 169.458992
Difference: 98.843247
✗ Worse than baseline (expected for simple greedy)

Note: The baseline used highly optimized solutions.
Our greedy approach is simpler but GUARANTEED to be valid.
