# Tessellation with Gap Constraints

Implement tessellation/translation approach for large N values with gap constraints.
Based on egortrushin kernel but with distance > 0 requirement.

In [1]:
import pandas as pd
import numpy as np
from shapely import affinity
from shapely.geometry import Polygon
from itertools import combinations
import json
import copy
import random
import time

class ChristmasTree:
    def __init__(self, center_x, center_y, angle):
        self.center_x = float(center_x)
        self.center_y = float(center_y)
        self.angle = float(angle)
        
        initial_polygon = Polygon([
            (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5),
            (0.2, 0.25), (0.1, 0.25), (0.35, 0.0),
            (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2),
            (-0.075, 0.0), (-0.35, 0.0), (-0.1, 0.25),
            (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5),
        ])
        rotated = affinity.rotate(initial_polygon, self.angle, origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=self.center_x, yoff=self.center_y)

def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

def get_min_distance(trees):
    if len(trees) <= 1:
        return float('inf')
    min_dist = float('inf')
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        min_dist = min(min_dist, dist)
    return min_dist

def get_bounding_box_side(trees):
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    return max(max_x - min_x, max_y - min_y)

def has_collision_with_gap(trees, min_gap=1e-9):
    """Check if any pair of trees violates minimum gap"""
    if len(trees) <= 1:
        return False
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if trees[i].polygon.distance(trees[j].polygon) < min_gap:
                return True
    return False

print("Functions defined")

Functions defined


In [4]:
def create_tessellation(n, base_trees, dx, dy):
    """
    Create n trees using tessellation pattern.
    base_trees: list of 2 base trees with different angles
    dx, dy: translation distances
    """
    trees = []
    num_base = len(base_trees)
    
    # Calculate grid size needed
    grid_size = int(np.ceil(np.sqrt(n / num_base))) + 1
    
    for i in range(grid_size):
        for j in range(grid_size):
            for k, base in enumerate(base_trees):
                if len(trees) >= n:
                    break
                new_x = base.center_x + i * dx
                new_y = base.center_y + j * dy
                trees.append(ChristmasTree(new_x, new_y, base.angle))
            if len(trees) >= n:
                break
        if len(trees) >= n:
            break
    
    return trees[:n]

def tessellation_sa(n, iterations=5000, min_gap=1e-9):
    """
    Simulated annealing to optimize tessellation parameters.
    Optimizes: base tree positions, angles, and translation distances.
    """
    random.seed(42 + n)
    
    # Initialize with 2 base trees at different angles
    # Start with LARGER spacing to ensure validity
    base_trees = [
        ChristmasTree(0, 0, 0),
        ChristmasTree(0.5, 0.5, 180)  # Offset and rotated
    ]
    
    # Start with spacing that guarantees no collision
    # Tree width is ~0.7, height is ~1.0
    dx = 1.5  # Large initial spacing
    dy = 1.5
    
    # Create initial tessellation
    trees = create_tessellation(n, base_trees, dx, dy)
    
    # Verify initial configuration is valid
    if has_collision_with_gap(trees, min_gap):
        # Try even larger spacing
        for scale in [2.0, 2.5, 3.0, 4.0]:
            dx = scale
            dy = scale
            trees = create_tessellation(n, base_trees, dx, dy)
            if not has_collision_with_gap(trees, min_gap):
                break
    
    if has_collision_with_gap(trees, min_gap):
        print(f"N={n}: Could not create valid initial tessellation")
        return None
    
    best_side = get_bounding_box_side(trees)
    best_params = {
        'base_trees': [(t.center_x, t.center_y, t.angle) for t in base_trees],
        'dx': dx,
        'dy': dy
    }
    
    current_side = best_side
    T = 1.0
    T_min = 0.0001
    alpha = (T_min / T) ** (1.0 / iterations)
    
    for it in range(iterations):
        # Choose what to perturb
        move_type = random.randint(0, 4)
        
        # Save current state
        old_base = [(t.center_x, t.center_y, t.angle) for t in base_trees]
        old_dx, old_dy = dx, dy
        
        sc = T  # Scale factor
        
        if move_type == 0:  # Perturb base tree 0 position
            base_trees[0] = ChristmasTree(
                base_trees[0].center_x + random.uniform(-0.1, 0.1) * sc,
                base_trees[0].center_y + random.uniform(-0.1, 0.1) * sc,
                base_trees[0].angle
            )
        elif move_type == 1:  # Perturb base tree 1 position
            base_trees[1] = ChristmasTree(
                base_trees[1].center_x + random.uniform(-0.1, 0.1) * sc,
                base_trees[1].center_y + random.uniform(-0.1, 0.1) * sc,
                base_trees[1].angle
            )
        elif move_type == 2:  # Perturb base tree angles
            idx = random.randint(0, 1)
            base_trees[idx] = ChristmasTree(
                base_trees[idx].center_x,
                base_trees[idx].center_y,
                (base_trees[idx].angle + random.uniform(-10, 10) * sc) % 360
            )
        elif move_type == 3:  # Perturb dx
            dx = max(0.5, dx + random.uniform(-0.1, 0.1) * sc)
        else:  # Perturb dy
            dy = max(0.5, dy + random.uniform(-0.1, 0.1) * sc)
        
        # Create new tessellation
        trees = create_tessellation(n, base_trees, dx, dy)
        
        # Check validity
        if has_collision_with_gap(trees, min_gap):
            # Revert
            base_trees = [ChristmasTree(p[0], p[1], p[2]) for p in old_base]
            dx, dy = old_dx, old_dy
            T *= alpha
            continue
        
        new_side = get_bounding_box_side(trees)
        delta = new_side - current_side
        
        if delta < 0 or random.random() < np.exp(-delta / T):
            current_side = new_side
            if new_side < best_side:
                best_side = new_side
                best_params = {
                    'base_trees': [(t.center_x, t.center_y, t.angle) for t in base_trees],
                    'dx': dx,
                    'dy': dy
                }
        else:
            # Revert
            base_trees = [ChristmasTree(p[0], p[1], p[2]) for p in old_base]
            dx, dy = old_dx, old_dy
        
        T *= alpha
    
    # Return best configuration
    base_trees = [ChristmasTree(p[0], p[1], p[2]) for p in best_params['base_trees']]
    trees = create_tessellation(n, base_trees, best_params['dx'], best_params['dy'])
    
    return trees, best_side

print("Tessellation SA defined")

Tessellation SA defined


In [6]:
# The tessellation approach is 2.7-3.5x worse than valid ensemble
# Let's analyze where the valid ensemble is weakest compared to touching solutions

# Load both ensembles
df_valid = pd.read_csv('/home/code/submission_candidates/candidate_001.csv')
df_touching = pd.read_csv('/home/code/submission_candidates/candidate_002.csv')

print("Comparing valid vs touching ensemble by N:")
print(f"{'N':>4} {'Valid':>10} {'Touching':>10} {'Gap':>10} {'Gap%':>8}")
print("-" * 50)

gaps = []
for n in range(1, 201):
    trees_valid = load_trees_for_n(df_valid, n)
    trees_touch = load_trees_for_n(df_touching, n)
    
    valid_side = get_bounding_box_side(trees_valid)
    touch_side = get_bounding_box_side(trees_touch)
    
    valid_contrib = (valid_side ** 2) / n
    touch_contrib = (touch_side ** 2) / n
    
    gap = valid_contrib - touch_contrib
    gap_pct = (gap / touch_contrib) * 100
    gaps.append((n, valid_contrib, touch_contrib, gap, gap_pct))
    
    if n <= 20 or n % 20 == 0:
        print(f"{n:4d} {valid_contrib:10.4f} {touch_contrib:10.4f} {gap:10.4f} {gap_pct:8.2f}%")

# Find N values with largest gaps
print("\n\\nTop 10 N values with largest gaps (potential for improvement):")
gaps_sorted = sorted(gaps, key=lambda x: x[3], reverse=True)
for n, valid_c, touch_c, gap, gap_pct in gaps_sorted[:10]:
    print(f"N={n:3d}: gap={gap:.4f} ({gap_pct:.2f}%)")

Comparing tessellation with valid ensemble:
   N      Valid Tessellation    Ratio")



  50     0.3709       1.0159     2.74x


  72     0.3506       1.1824     3.37x


 100     0.3461       1.1008     3.18x


 144     0.3652       1.1841     3.24x


 200     0.3380       1.1914     3.53x


In [None]:
# Load valid ensemble for comparison
df_valid = pd.read_csv('/home/code/submission_candidates/candidate_001.csv')

print("\nComparing tessellation with valid ensemble for large N:")
print(f"{'N':>4} {'Valid':>10} {'Tessellation':>12} {'Diff':>10}")

for n in [50, 72, 100, 144, 200]:
    trees_valid = load_trees_for_n(df_valid, n)
    valid_side = get_bounding_box_side(trees_valid)
    valid_contrib = (valid_side ** 2) / n
    
    result = tessellation_sa(n, iterations=3000)
    if result:
        trees_tess, tess_side = result
        tess_contrib = (tess_side ** 2) / n
        diff = tess_contrib - valid_contrib
        print(f"{n:4d} {valid_contrib:10.4f} {tess_contrib:12.4f} {diff:+10.4f}")

In [None]:
# The tessellation approach is producing worse results than the valid ensemble
# This is because the valid ensemble was already optimized by sophisticated algorithms
# Let's check what the valid ensemble scores are for large N

print("Valid ensemble scores for large N:")
for n in range(50, 201, 10):
    trees = load_trees_for_n(df_valid, n)
    side = get_bounding_box_side(trees)
    contrib = (side ** 2) / n
    min_d = get_min_distance(trees)
    print(f"N={n:3d}: side={side:.4f}, contrib={contrib:.4f}, min_dist={min_d:.2e}")

In [None]:
# Since tessellation is not beating the valid ensemble, let's use the valid ensemble
# and just verify it's still the best we have

# Calculate total score for valid ensemble
total_score = 0
for n in range(1, 201):
    trees = load_trees_for_n(df_valid, n)
    side = get_bounding_box_side(trees)
    total_score += (side ** 2) / n

print(f"Valid ensemble total score: {total_score:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {total_score - 68.919154:.6f}")

In [None]:
# Save valid ensemble as submission (it's still the best)
import shutil
shutil.copy('/home/code/submission_candidates/candidate_001.csv', '/home/submission/submission.csv')
print("Using valid ensemble as submission")

# Save metrics
metrics = {'cv_score': total_score}
with open('/home/code/experiments/008_tessellation_gap/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Metrics saved: {metrics}")