# Create Valid Submission with Kaggle-Compatible Validation

This notebook:
1. Uses the EXACT ChristmasTree class from the getting-started kernel
2. Creates a submission with GUARANTEED no overlaps (wide spacing)
3. Validates using Kaggle's exact method
4. Submits to verify format is accepted

In [1]:
import math
import os
import random
from decimal import Decimal, getcontext
import numpy as np
import pandas as pd
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import json

# Set precision for Decimal - EXACTLY as in getting-started kernel
getcontext().prec = 25
scale_factor = Decimal('1e15')

print(f"Decimal precision: {getcontext().prec}")
print(f"Scale factor: {scale_factor}")

Decimal precision: 25
Scale factor: 1E+15


In [2]:
# EXACT ChristmasTree class from getting-started kernel
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        """Initializes the Christmas tree with a specific position and rotation."""
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon(
            [
                # Start at Tip
                (Decimal('0.0') * scale_factor, tip_y * scale_factor),
                # Right side - Top Tier
                (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
                (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
                # Right side - Middle Tier
                (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
                (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
                # Right side - Bottom Tier
                (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
                # Right Trunk
                (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
                # Left Trunk
                (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                # Left side - Bottom Tier
                (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                # Left side - Middle Tier
                (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
                (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
                # Left side - Top Tier
                (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
                (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
            ]
        )
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

print("ChristmasTree class defined (exact copy from getting-started kernel)")

ChristmasTree class defined (exact copy from getting-started kernel)


In [3]:
def check_overlap_kaggle_style(trees):
    """Check for overlaps using Kaggle's exact method."""
    polygons = [t.polygon for t in trees]
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                return True, f"Trees {i} and {j} overlap"
    return False, "OK"

def calculate_side_length(trees):
    """Calculate bounding box side length."""
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor
    width = maxx - minx
    height = maxy - miny
    return max(width, height)

print("Validation functions defined")

Validation functions defined


In [4]:
# Create a MINIMAL valid submission with WIDE spacing
# Strategy: Place trees in a grid pattern with spacing = 2.0 (tree is ~1.0 tall, ~0.7 wide)
# This GUARANTEES no overlaps

def create_wide_spacing_submission():
    """Create submission with guaranteed no overlaps using wide grid spacing."""
    all_trees = {}
    
    for n in range(1, 201):
        trees = []
        # Calculate grid size needed
        grid_size = int(np.ceil(np.sqrt(n)))
        spacing = 2.0  # Wide spacing to guarantee no overlaps
        
        tree_idx = 0
        for row in range(grid_size):
            for col in range(grid_size):
                if tree_idx >= n:
                    break
                x = col * spacing
                y = row * spacing
                # Use 45 degree angle (optimal for single tree)
                tree = ChristmasTree(center_x=str(x), center_y=str(y), angle='45')
                trees.append(tree)
                tree_idx += 1
            if tree_idx >= n:
                break
        
        all_trees[n] = trees
    
    return all_trees

print("Creating wide-spacing submission...")
all_trees = create_wide_spacing_submission()
print(f"Created configurations for N=1 to N=200")

Creating wide-spacing submission...


Created configurations for N=1 to N=200


In [5]:
# Validate ALL N values using Kaggle's exact method
print("Validating all N values...")
overlapping_n = []

for n in range(1, 201):
    has_overlap, msg = check_overlap_kaggle_style(all_trees[n])
    if has_overlap:
        overlapping_n.append(n)
        if len(overlapping_n) <= 5:
            print(f"N={n}: {msg}")

print(f"\nTotal N values with overlaps: {len(overlapping_n)}")
if overlapping_n:
    print(f"Overlapping N values: {overlapping_n}")
else:
    print("✓ All N values validated - NO OVERLAPS!")

Validating all N values...



Total N values with overlaps: 0
✓ All N values validated - NO OVERLAPS!


In [6]:
# Calculate total score
total_score = 0
per_n_scores = {}

for n in range(1, 201):
    side = calculate_side_length(all_trees[n])
    score = float(side ** 2) / n
    per_n_scores[n] = score
    total_score += score

print(f"Total score: {total_score:.6f}")
print(f"\nNote: This score is TERRIBLE because we use wide spacing.")
print(f"But it should be ACCEPTED by Kaggle (no overlaps).")
print(f"\nTop 5 score contributors:")
sorted_scores = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:5]:
    side = calculate_side_length(all_trees[n])
    print(f"N={n:3d}: side={float(side):.6f}, score={score:.6f}")

Total score: 769.918978

Note: This score is TERRIBLE because we use wide spacing.
But it should be ACCEPTED by Kaggle (no overlaps).

Top 5 score contributors:
N= 10: side=6.813173, score=4.641932
N=  5: side=4.813173, score=4.633326
N= 17: side=8.813173, score=4.568942
N= 26: side=10.813173, score=4.497104
N= 37: side=12.813173, score=4.437227


In [7]:
# Create submission DataFrame
index = [f'{n:03d}_{t}' for n in range(1, 201) for t in range(n)]
tree_data = []

for n in range(1, 201):
    for tree in all_trees[n]:
        tree_data.append([tree.center_x, tree.center_y, tree.angle])

cols = ['x', 'y', 'deg']
submission = pd.DataFrame(index=index, columns=cols, data=tree_data).rename_axis('id')

# Format EXACTLY as in getting-started kernel
for col in cols:
    submission[col] = submission[col].astype(float).round(decimals=6)
    
# Prepend 's' to ensure string format
for col in submission.columns:
    submission[col] = 's' + submission[col].astype('string')

print(f"Submission shape: {submission.shape}")
print(f"Expected: (20100, 3)")
print(f"\nFirst 5 rows:")
print(submission.head())

Submission shape: (20100, 3)
Expected: (20100, 3)

First 5 rows:
          x     y    deg
id                      
001_0  s0.0  s0.0  s45.0
002_0  s0.0  s0.0  s45.0
002_1  s2.0  s0.0  s45.0
003_0  s0.0  s0.0  s45.0
003_1  s2.0  s0.0  s45.0


In [8]:
# Save submission
import shutil

os.makedirs('/home/submission', exist_ok=True)
submission.to_csv('/home/submission/submission.csv')
submission.to_csv('/home/code/experiments/002_fix_validation/submission.csv')

# Save metrics
metrics = {
    'cv_score': total_score,
    'overlapping_n_count': len(overlapping_n),
    'note': 'Wide-spacing submission to verify format is accepted by Kaggle'
}

with open('/home/code/experiments/002_fix_validation/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nSubmission saved!")
print(f"CV Score: {total_score:.6f}")
print(f"Overlapping N values: {len(overlapping_n)}")
print(f"\nThis submission should be ACCEPTED by Kaggle (even though score is bad).")


Submission saved!
CV Score: 769.918978
Overlapping N values: 0

This submission should be ACCEPTED by Kaggle (even though score is bad).


In [9]:
# Verify the submission file
print("Verifying submission file...")
df = pd.read_csv('/home/submission/submission.csv')
print(f"Rows: {len(df)} (expected: 20100)")
print(f"Columns: {list(df.columns)}")
print(f"\nFirst 5 rows:")
print(df.head())
print(f"\nLast 5 rows:")
print(df.tail())

Verifying submission file...
Rows: 20100 (expected: 20100)
Columns: ['id', 'x', 'y', 'deg']

First 5 rows:
      id     x     y    deg
0  001_0  s0.0  s0.0  s45.0
1  002_0  s0.0  s0.0  s45.0
2  002_1  s2.0  s0.0  s45.0
3  003_0  s0.0  s0.0  s45.0
4  003_1  s2.0  s0.0  s45.0

Last 5 rows:
            id     x      y    deg
20095  200_195  s0.0  s26.0  s45.0
20096  200_196  s2.0  s26.0  s45.0
20097  200_197  s4.0  s26.0  s45.0
20098  200_198  s6.0  s26.0  s45.0
20099  200_199  s8.0  s26.0  s45.0
