# Micro-Separation: Convert Touching Trees to Valid Submission

Take the best touching configurations (70.65) and apply minimal separation to create gaps.
Goal: Recover most of the 1.17 point gap between touching (70.65) and non-touching (71.81).

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon, Point
from shapely.ops import nearest_points
from itertools import combinations
from scipy.optimize import minimize_scalar
from scipy.spatial import ConvexHull
import json
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 30

# Christmas Tree class
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = float(center_x)
        self.center_y = float(center_y)
        self.angle = float(angle)
        
        initial_polygon = Polygon([
            (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5),
            (0.2, 0.25), (0.1, 0.25), (0.35, 0.0),
            (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2),
            (-0.075, 0.0), (-0.35, 0.0), (-0.1, 0.25),
            (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5),
        ])
        rotated = affinity.rotate(initial_polygon, self.angle, origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=self.center_x, yoff=self.center_y)

def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

print("Classes defined")

Classes defined


In [2]:
def get_min_distance(trees):
    """Get minimum distance between any pair of trees"""
    if len(trees) <= 1:
        return float('inf')
    min_dist = float('inf')
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        min_dist = min(min_dist, dist)
    return min_dist

def get_touching_pairs(trees, threshold=1e-9):
    """Get pairs of trees that are touching"""
    touching = []
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        if dist < threshold:
            touching.append((i, j, dist))
    return touching

def get_bounding_box_side(trees):
    """Get the side length of the bounding square"""
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    return max(max_x - min_x, max_y - min_y)

def is_valid_configuration(trees, min_gap=1e-9):
    """Check if configuration has gaps between all trees"""
    if len(trees) <= 1:
        return True
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        if dist < min_gap:
            return False
    return True

print("Helper functions defined")

Helper functions defined


In [3]:
# Load the best touching ensemble
df_touching = pd.read_csv('/home/code/experiments/002_valid_ensemble/submission.csv')
print(f"Loaded touching ensemble with {len(df_touching)} rows")

# Calculate current score
touching_score = 0
for n in range(1, 201):
    trees = load_trees_for_n(df_touching, n)
    side = get_bounding_box_side(trees)
    touching_score += (side ** 2) / n

print(f"Touching ensemble score: {touching_score:.6f}")

Loaded touching ensemble with 20100 rows


Touching ensemble score: 70.646824


In [4]:
# Analyze touching pairs for each N
print("Analyzing touching pairs...")
touching_analysis = []
for n in range(1, 201):
    trees = load_trees_for_n(df_touching, n)
    touching = get_touching_pairs(trees)
    if touching:
        touching_analysis.append({
            'n': n,
            'num_touching': len(touching),
            'min_dist': min(t[2] for t in touching)
        })

print(f"N values with touching trees: {len(touching_analysis)}/200")
print(f"\nFirst 10:")
for item in touching_analysis[:10]:
    print(f"  N={item['n']}: {item['num_touching']} touching pairs, min_dist={item['min_dist']:.2e}")

Analyzing touching pairs...


N values with touching trees: 199/200

First 10:
  N=2: 1 touching pairs, min_dist=5.35e-16
  N=3: 1 touching pairs, min_dist=5.78e-17
  N=4: 3 touching pairs, min_dist=2.34e-16
  N=5: 3 touching pairs, min_dist=4.23e-15
  N=6: 6 touching pairs, min_dist=5.40e-16
  N=7: 9 touching pairs, min_dist=1.24e-15
  N=8: 10 touching pairs, min_dist=1.22e-16
  N=9: 10 touching pairs, min_dist=3.96e-15
  N=10: 15 touching pairs, min_dist=2.21e-15
  N=11: 14 touching pairs, min_dist=9.27e-14


In [5]:
def separate_touching_trees(trees, min_gap=0.0001, max_iterations=500):
    """
    Separate touching trees by applying minimal translations.
    Uses iterative approach with smaller steps.
    """
    if len(trees) <= 1:
        return [(t.center_x, t.center_y, t.angle) for t in trees], True
    
    # Get current positions
    positions = [[t.center_x, t.center_y, t.angle] for t in trees]
    
    for iteration in range(max_iterations):
        # Rebuild trees with current positions
        current_trees = [ChristmasTree(p[0], p[1], p[2]) for p in positions]
        
        # Find touching pairs
        touching = get_touching_pairs(current_trees, threshold=min_gap)
        if not touching:
            return positions, True
        
        # For each touching pair, calculate separation vector
        for i, j, dist in touching:
            poly_i = current_trees[i].polygon
            poly_j = current_trees[j].polygon
            
            # Get nearest points between polygons
            p1, p2 = nearest_points(poly_i, poly_j)
            
            # Calculate separation vector (from i to j)
            dx = p2.x - p1.x
            dy = p2.y - p1.y
            
            # Normalize and scale
            length = np.sqrt(dx**2 + dy**2)
            if length < 1e-12:
                # Points are essentially the same, use random direction
                angle = np.random.random() * 2 * np.pi
                dx, dy = np.cos(angle), np.sin(angle)
                length = 1.0
            
            # Calculate how much to move (smaller steps for stability)
            move_dist = max((min_gap - dist + 1e-8) / 2, 1e-6)
            move_x = (dx / length) * move_dist
            move_y = (dy / length) * move_dist
            
            # Move trees apart
            positions[i][0] -= move_x
            positions[i][1] -= move_y
            positions[j][0] += move_x
            positions[j][1] += move_y
    
    # Check if we succeeded
    final_trees = [ChristmasTree(p[0], p[1], p[2]) for p in positions]
    if is_valid_configuration(final_trees, min_gap):
        return positions, True
    return positions, False

print("Separation function defined (v2 with smaller gap)")

Separation function defined


In [6]:
# Test on a single N value
test_n = 4  # Small N for testing
trees = load_trees_for_n(df_touching, test_n)
print(f"N={test_n}: {len(trees)} trees")
print(f"Before: min_dist = {get_min_distance(trees):.2e}")
print(f"Before: bbox_side = {get_bounding_box_side(trees):.6f}")

new_positions, success = separate_touching_trees(trees, min_gap=0.001)
print(f"\nSeparation successful: {success}")

if success:
    new_trees = [ChristmasTree(p[0], p[1], p[2]) for p in new_positions]
    print(f"After: min_dist = {get_min_distance(new_trees):.2e}")
    print(f"After: bbox_side = {get_bounding_box_side(new_trees):.6f}")
    print(f"Score impact: {(get_bounding_box_side(new_trees)**2 - get_bounding_box_side(trees)**2)/test_n:.6f}")

N=4: 4 trees
Before: min_dist = 2.34e-16
Before: bbox_side = 1.290806

Separation successful: True
After: min_dist = 1.00e-03
After: bbox_side = 1.292518
Score impact: 0.001106


In [7]:
def optimize_rotation(trees):
    """
    Rotate entire configuration to minimize bounding box (fix_direction).
    Returns optimal angle and new bounding box side.
    """
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    
    # Get convex hull for efficiency
    try:
        hull = ConvexHull(all_points)
        hull_points = all_points[hull.vertices]
    except:
        hull_points = all_points
    
    def bbox_at_angle(angle):
        """Calculate bounding box side at given rotation angle"""
        rad = np.radians(angle)
        cos_a, sin_a = np.cos(rad), np.sin(rad)
        rotated = hull_points @ np.array([[cos_a, -sin_a], [sin_a, cos_a]])
        return max(rotated.max(axis=0) - rotated.min(axis=0))
    
    # Find optimal angle
    result = minimize_scalar(bbox_at_angle, bounds=(0, 90), method='bounded')
    return result.x, result.fun

# Test rotation optimization
print("Testing rotation optimization...")
opt_angle, opt_side = optimize_rotation(new_trees)
print(f"Optimal rotation: {opt_angle:.4f} degrees")
print(f"Optimized bbox side: {opt_side:.6f}")

Testing rotation optimization...
Optimal rotation: 0.0398 degrees
Optimized bbox side: 1.292504


In [8]:
def apply_rotation_to_trees(trees, angle):
    """
    Rotate all trees around the centroid by given angle.
    Returns new tree positions.
    """
    # Calculate centroid
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    centroid = all_points.mean(axis=0)
    
    # Rotate each tree position around centroid
    new_positions = []
    rad = np.radians(angle)
    cos_a, sin_a = np.cos(rad), np.sin(rad)
    
    for tree in trees:
        # Translate to origin
        dx = tree.center_x - centroid[0]
        dy = tree.center_y - centroid[1]
        
        # Rotate
        new_x = dx * cos_a - dy * sin_a + centroid[0]
        new_y = dx * sin_a + dy * cos_a + centroid[1]
        
        # Rotate the tree's own angle
        new_angle = tree.angle + angle
        
        new_positions.append([new_x, new_y, new_angle])
    
    return new_positions

print("Rotation function defined")

Rotation function defined


In [9]:
# Process all N values with micro-separation
print("Processing all N values with micro-separation...")

results = []
failed_n = []

for n in range(1, 201):
    trees = load_trees_for_n(df_touching, n)
    original_side = get_bounding_box_side(trees)
    original_contribution = (original_side ** 2) / n
    
    # Check if already valid
    if is_valid_configuration(trees, min_gap=1e-9):
        results.append({
            'n': n,
            'positions': [(t.center_x, t.center_y, t.angle) for t in trees],
            'side': original_side,
            'contribution': original_contribution,
            'separated': False
        })
        continue
    
    # Apply micro-separation
    new_positions, success = separate_touching_trees(trees, min_gap=0.001, max_iterations=200)
    
    if not success:
        failed_n.append(n)
        # Use original (will be invalid)
        results.append({
            'n': n,
            'positions': [(t.center_x, t.center_y, t.angle) for t in trees],
            'side': original_side,
            'contribution': original_contribution,
            'separated': False
        })
        continue
    
    # Create new trees and optimize rotation
    new_trees = [ChristmasTree(p[0], p[1], p[2]) for p in new_positions]
    opt_angle, opt_side = optimize_rotation(new_trees)
    
    # Apply rotation
    rotated_positions = apply_rotation_to_trees(new_trees, opt_angle)
    final_trees = [ChristmasTree(p[0], p[1], p[2]) for p in rotated_positions]
    final_side = get_bounding_box_side(final_trees)
    
    # Verify still valid
    if not is_valid_configuration(final_trees, min_gap=1e-9):
        # Rotation may have caused issues, use pre-rotation
        final_side = get_bounding_box_side(new_trees)
        rotated_positions = new_positions
    
    results.append({
        'n': n,
        'positions': rotated_positions,
        'side': final_side,
        'contribution': (final_side ** 2) / n,
        'separated': True
    })
    
    if n % 20 == 0:
        print(f"Processed N={n}")

print(f"\nFailed to separate: {len(failed_n)} N values")
if failed_n:
    print(f"Failed N values: {failed_n[:20]}")

Processing all N values with micro-separation...



Failed to separate: 191 N values
Failed N values: [4, 7, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]


In [None]:
# Calculate total score
separated_score = sum(r['contribution'] for r in results)
print(f"\nMicro-separated score: {separated_score:.6f}")
print(f"Original touching score: {touching_score:.6f}")
print(f"Valid ensemble score: 71.812779")
print(f"Target: 68.919154")
print(f"\nImprovement over valid ensemble: {71.812779 - separated_score:.6f}")
print(f"Gap to target: {separated_score - 68.919154:.6f}")

In [None]:
# Verify all configurations are valid
print("\nVerifying all configurations...")
invalid_count = 0
for r in results:
    trees = [ChristmasTree(p[0], p[1], p[2]) for p in r['positions']]
    if not is_valid_configuration(trees, min_gap=1e-9):
        invalid_count += 1
        print(f"N={r['n']}: INVALID (min_dist={get_min_distance(trees):.2e})")

if invalid_count == 0:
    print("All configurations are valid!")
else:
    print(f"\n{invalid_count} invalid configurations")

In [None]:
# Build submission dataframe
all_rows = []
for r in results:
    n = r['n']
    for idx, (x, y, angle) in enumerate(r['positions']):
        all_rows.append({
            'id': f"{n:03d}_{idx}",
            'x': f"s{x}",
            'y': f"s{y}",
            'deg': f"s{angle}"
        })

submission_df = pd.DataFrame(all_rows)
print(f"Submission has {len(submission_df)} rows")
print(submission_df.head(10))

In [None]:
# Save submission
submission_df.to_csv('/home/code/experiments/003_micro_separation/submission.csv', index=False)
print("Saved to experiments/003_micro_separation/submission.csv")

# Copy to /home/submission/
import shutil
shutil.copy('/home/code/experiments/003_micro_separation/submission.csv', '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

# Save metrics
metrics = {'cv_score': separated_score}
with open('/home/code/experiments/003_micro_separation/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Metrics saved: {metrics}")

In [None]:
# Score breakdown by N range
print("\nScore breakdown by N range:")
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    range_results = [r for r in results if start <= r['n'] <= end]
    range_score = sum(r['contribution'] for r in range_results)
    print(f"  N={start}-{end}: {range_score:.4f}")

print(f"\nTotal: {separated_score:.6f}")