# Micro-Separation: Convert Touching Trees to Valid Submission

Take the best touching configurations (70.65) and apply minimal separation to create gaps.
Goal: Recover most of the 1.17 point gap between touching (70.65) and non-touching (71.81).

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon, Point
from shapely.ops import nearest_points
from itertools import combinations
from scipy.optimize import minimize_scalar
from scipy.spatial import ConvexHull
import json
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 30

# Christmas Tree class
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = float(center_x)
        self.center_y = float(center_y)
        self.angle = float(angle)
        
        initial_polygon = Polygon([
            (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5),
            (0.2, 0.25), (0.1, 0.25), (0.35, 0.0),
            (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2),
            (-0.075, 0.0), (-0.35, 0.0), (-0.1, 0.25),
            (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5),
        ])
        rotated = affinity.rotate(initial_polygon, self.angle, origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=self.center_x, yoff=self.center_y)

def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

print("Classes defined")

Classes defined


In [2]:
def get_min_distance(trees):
    """Get minimum distance between any pair of trees"""
    if len(trees) <= 1:
        return float('inf')
    min_dist = float('inf')
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        min_dist = min(min_dist, dist)
    return min_dist

def get_touching_pairs(trees, threshold=1e-9):
    """Get pairs of trees that are touching"""
    touching = []
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        if dist < threshold:
            touching.append((i, j, dist))
    return touching

def get_bounding_box_side(trees):
    """Get the side length of the bounding square"""
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    return max(max_x - min_x, max_y - min_y)

def is_valid_configuration(trees, min_gap=1e-9):
    """Check if configuration has gaps between all trees"""
    if len(trees) <= 1:
        return True
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        if dist < min_gap:
            return False
    return True

print("Helper functions defined")

Helper functions defined


In [3]:
# Load the best touching ensemble
df_touching = pd.read_csv('/home/code/experiments/002_valid_ensemble/submission.csv')
print(f"Loaded touching ensemble with {len(df_touching)} rows")

# Calculate current score
touching_score = 0
for n in range(1, 201):
    trees = load_trees_for_n(df_touching, n)
    side = get_bounding_box_side(trees)
    touching_score += (side ** 2) / n

print(f"Touching ensemble score: {touching_score:.6f}")

Loaded touching ensemble with 20100 rows


Touching ensemble score: 70.646824


In [4]:
# Analyze touching pairs for each N
print("Analyzing touching pairs...")
touching_analysis = []
for n in range(1, 201):
    trees = load_trees_for_n(df_touching, n)
    touching = get_touching_pairs(trees)
    if touching:
        touching_analysis.append({
            'n': n,
            'num_touching': len(touching),
            'min_dist': min(t[2] for t in touching)
        })

print(f"N values with touching trees: {len(touching_analysis)}/200")
print(f"\nFirst 10:")
for item in touching_analysis[:10]:
    print(f"  N={item['n']}: {item['num_touching']} touching pairs, min_dist={item['min_dist']:.2e}")

Analyzing touching pairs...


N values with touching trees: 199/200

First 10:
  N=2: 1 touching pairs, min_dist=5.35e-16
  N=3: 1 touching pairs, min_dist=5.78e-17
  N=4: 3 touching pairs, min_dist=2.34e-16
  N=5: 3 touching pairs, min_dist=4.23e-15
  N=6: 6 touching pairs, min_dist=5.40e-16
  N=7: 9 touching pairs, min_dist=1.24e-15
  N=8: 10 touching pairs, min_dist=1.22e-16
  N=9: 10 touching pairs, min_dist=3.96e-15
  N=10: 15 touching pairs, min_dist=2.21e-15
  N=11: 14 touching pairs, min_dist=9.27e-14


In [5]:
def separate_touching_trees(trees, min_gap=0.0001, max_iterations=500):
    """
    Separate touching trees by applying minimal translations.
    Uses iterative approach with smaller steps.
    """
    if len(trees) <= 1:
        return [(t.center_x, t.center_y, t.angle) for t in trees], True
    
    # Get current positions
    positions = [[t.center_x, t.center_y, t.angle] for t in trees]
    
    for iteration in range(max_iterations):
        # Rebuild trees with current positions
        current_trees = [ChristmasTree(p[0], p[1], p[2]) for p in positions]
        
        # Find touching pairs
        touching = get_touching_pairs(current_trees, threshold=min_gap)
        if not touching:
            return positions, True
        
        # For each touching pair, calculate separation vector
        for i, j, dist in touching:
            poly_i = current_trees[i].polygon
            poly_j = current_trees[j].polygon
            
            # Get nearest points between polygons
            p1, p2 = nearest_points(poly_i, poly_j)
            
            # Calculate separation vector (from i to j)
            dx = p2.x - p1.x
            dy = p2.y - p1.y
            
            # Normalize and scale
            length = np.sqrt(dx**2 + dy**2)
            if length < 1e-12:
                # Points are essentially the same, use random direction
                angle = np.random.random() * 2 * np.pi
                dx, dy = np.cos(angle), np.sin(angle)
                length = 1.0
            
            # Calculate how much to move (smaller steps for stability)
            move_dist = max((min_gap - dist + 1e-8) / 2, 1e-6)
            move_x = (dx / length) * move_dist
            move_y = (dy / length) * move_dist
            
            # Move trees apart
            positions[i][0] -= move_x
            positions[i][1] -= move_y
            positions[j][0] += move_x
            positions[j][1] += move_y
    
    # Check if we succeeded
    final_trees = [ChristmasTree(p[0], p[1], p[2]) for p in positions]
    if is_valid_configuration(final_trees, min_gap):
        return positions, True
    return positions, False

print("Separation function defined (v2 with smaller gap)")

Separation function defined (v2 with smaller gap)


In [6]:
# Test on a single N value
test_n = 4  # Small N for testing
trees = load_trees_for_n(df_touching, test_n)
print(f"N={test_n}: {len(trees)} trees")
print(f"Before: min_dist = {get_min_distance(trees):.2e}")
print(f"Before: bbox_side = {get_bounding_box_side(trees):.6f}")

new_positions, success = separate_touching_trees(trees, min_gap=0.001)
print(f"\nSeparation successful: {success}")

if success:
    new_trees = [ChristmasTree(p[0], p[1], p[2]) for p in new_positions]
    print(f"After: min_dist = {get_min_distance(new_trees):.2e}")
    print(f"After: bbox_side = {get_bounding_box_side(new_trees):.6f}")
    print(f"Score impact: {(get_bounding_box_side(new_trees)**2 - get_bounding_box_side(trees)**2)/test_n:.6f}")

N=4: 4 trees
Before: min_dist = 2.34e-16
Before: bbox_side = 1.290806

Separation successful: True
After: min_dist = 1.00e-03
After: bbox_side = 1.292518
Score impact: 0.001106


In [7]:
def optimize_rotation(trees):
    """
    Rotate entire configuration to minimize bounding box (fix_direction).
    Returns optimal angle and new bounding box side.
    """
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    
    # Get convex hull for efficiency
    try:
        hull = ConvexHull(all_points)
        hull_points = all_points[hull.vertices]
    except:
        hull_points = all_points
    
    def bbox_at_angle(angle):
        """Calculate bounding box side at given rotation angle"""
        rad = np.radians(angle)
        cos_a, sin_a = np.cos(rad), np.sin(rad)
        rotated = hull_points @ np.array([[cos_a, -sin_a], [sin_a, cos_a]])
        return max(rotated.max(axis=0) - rotated.min(axis=0))
    
    # Find optimal angle
    result = minimize_scalar(bbox_at_angle, bounds=(0, 90), method='bounded')
    return result.x, result.fun

# Test rotation optimization
print("Testing rotation optimization...")
opt_angle, opt_side = optimize_rotation(new_trees)
print(f"Optimal rotation: {opt_angle:.4f} degrees")
print(f"Optimized bbox side: {opt_side:.6f}")

Testing rotation optimization...
Optimal rotation: 0.0398 degrees
Optimized bbox side: 1.292504


In [8]:
def apply_rotation_to_trees(trees, angle):
    """
    Rotate all trees around the centroid by given angle.
    Returns new tree positions.
    """
    # Calculate centroid
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    centroid = all_points.mean(axis=0)
    
    # Rotate each tree position around centroid
    new_positions = []
    rad = np.radians(angle)
    cos_a, sin_a = np.cos(rad), np.sin(rad)
    
    for tree in trees:
        # Translate to origin
        dx = tree.center_x - centroid[0]
        dy = tree.center_y - centroid[1]
        
        # Rotate
        new_x = dx * cos_a - dy * sin_a + centroid[0]
        new_y = dx * sin_a + dy * cos_a + centroid[1]
        
        # Rotate the tree's own angle
        new_angle = tree.angle + angle
        
        new_positions.append([new_x, new_y, new_angle])
    
    return new_positions

print("Rotation function defined")

Rotation function defined


In [7]:
# Simpler approach: For each N, try to separate with very small gap
# If fails, fall back to the valid ensemble configuration
print("Processing all N values...")

# Load valid ensemble for fallback
df_valid = pd.read_csv('/home/code/experiments/002_valid_submission/submission.csv')

results = []
used_valid_fallback = 0
MIN_GAP = 0.0001

for n in range(1, 201):
    trees_touching = load_trees_for_n(df_touching, n)
    trees_valid = load_trees_for_n(df_valid, n)
    
    touching_side = get_bounding_box_side(trees_touching)
    valid_side = get_bounding_box_side(trees_valid)
    
    # Check if touching is already valid
    if is_valid_configuration(trees_touching, min_gap=1e-9):
        results.append({
            'n': n,
            'positions': [(t.center_x, t.center_y, t.angle) for t in trees_touching],
            'side': touching_side,
            'contribution': (touching_side ** 2) / n,
            'source': 'touching_valid'
        })
        continue
    
    # Try to separate touching trees
    new_positions, success = separate_touching_trees(trees_touching, min_gap=MIN_GAP, max_iterations=500)
    
    if success:
        new_trees = [ChristmasTree(p[0], p[1], p[2]) for p in new_positions]
        new_side = get_bounding_box_side(new_trees)
        
        # Use separated if better than valid, otherwise use valid
        if new_side < valid_side:
            results.append({
                'n': n,
                'positions': new_positions,
                'side': new_side,
                'contribution': (new_side ** 2) / n,
                'source': 'separated'
            })
        else:
            results.append({
                'n': n,
                'positions': [(t.center_x, t.center_y, t.angle) for t in trees_valid],
                'side': valid_side,
                'contribution': (valid_side ** 2) / n,
                'source': 'valid_better'
            })
            used_valid_fallback += 1
    else:
        # Separation failed, use valid ensemble
        results.append({
            'n': n,
            'positions': [(t.center_x, t.center_y, t.angle) for t in trees_valid],
            'side': valid_side,
            'contribution': (valid_side ** 2) / n,
            'source': 'valid_fallback'
        })
        used_valid_fallback += 1
    
    if n % 50 == 0:
        print(f"Processed N={n}")

print(f"\\nUsed valid fallback for {used_valid_fallback} N values")

Processing all N values...


Processed N=50


Processed N=100


Processed N=150


Processed N=200
\nUsed valid fallback for 194 N values


In [8]:
# Calculate total score
separated_score = sum(r['contribution'] for r in results)
print(f"\nMicro-separated score: {separated_score:.6f}")
print(f"Original touching score: {touching_score:.6f}")
print(f"Valid ensemble score: 71.812779")
print(f"Target: 68.919154")
print(f"\nImprovement over valid ensemble: {71.812779 - separated_score:.6f}")
print(f"Gap to target: {separated_score - 68.919154:.6f}")


Micro-separated score: 71.811060
Original touching score: 70.646824
Valid ensemble score: 71.812779
Target: 68.919154

Improvement over valid ensemble: 0.001719
Gap to target: 2.891906


In [9]:
# Let's analyze which N values were successfully separated
source_counts = {}
for r in results:
    source = r['source']
    source_counts[source] = source_counts.get(source, 0) + 1

print("Source distribution:")
for source, count in sorted(source_counts.items()):
    print(f"  {source}: {count}")

# Show the N values that were successfully separated
separated_n = [r['n'] for r in results if r['source'] == 'separated']
print(f"\nSuccessfully separated N values: {separated_n}")

Source distribution:
  separated: 5
  touching_valid: 1
  valid_better: 4
  valid_fallback: 190

Successfully separated N values: [5, 6, 7, 8, 13]


In [14]:
# Try a force-directed approach for separation
def force_directed_separation(trees, min_gap=0.0001, max_iterations=1000, step_size=0.0001):
    """
    Use force-directed layout to separate touching trees.
    Each tree repels others based on overlap/proximity.
    """
    if len(trees) <= 1:
        return [(t.center_x, t.center_y, t.angle) for t in trees], True
    
    n = len(trees)
    positions = np.array([[t.center_x, t.center_y] for t in trees])
    angles = [t.angle for t in trees]
    
    for iteration in range(max_iterations):
        # Calculate forces
        forces = np.zeros((n, 2))
        
        # Rebuild trees
        current_trees = [ChristmasTree(positions[i, 0], positions[i, 1], angles[i]) for i in range(n)]
        
        # Check if valid
        all_valid = True
        for i in range(n):
            for j in range(i+1, n):
                dist = current_trees[i].polygon.distance(current_trees[j].polygon)
                if dist < min_gap:
                    all_valid = False
                    # Calculate repulsion force
                    p1, p2 = nearest_points(current_trees[i].polygon, current_trees[j].polygon)
                    dx = p2.x - p1.x
                    dy = p2.y - p1.y
                    length = np.sqrt(dx**2 + dy**2)
                    if length < 1e-12:
                        angle = np.random.random() * 2 * np.pi
                        dx, dy = np.cos(angle), np.sin(angle)
                        length = 1.0
                    
                    # Repulsion strength inversely proportional to distance
                    strength = (min_gap - dist + 1e-8) * 10
                    force_x = -(dx / length) * strength
                    force_y = -(dy / length) * strength
                    
                    forces[i, 0] += force_x
                    forces[i, 1] += force_y
                    forces[j, 0] -= force_x
                    forces[j, 1] -= force_y
        
        if all_valid:
            return [[positions[i, 0], positions[i, 1], angles[i]] for i in range(n)], True
        
        # Apply forces with step size
        positions += forces * step_size
    
    # Final check
    final_trees = [ChristmasTree(positions[i, 0], positions[i, 1], angles[i]) for i in range(n)]
    if is_valid_configuration(final_trees, min_gap):
        return [[positions[i, 0], positions[i, 1], angles[i]] for i in range(n)], True
    return [[positions[i, 0], positions[i, 1], angles[i]] for i in range(n)], False

print("Force-directed separation defined")

Force-directed separation defined


In [15]:
# Test force-directed on a few N values
test_ns = [10, 20, 50]
for test_n in test_ns:
    trees = load_trees_for_n(df_touching, test_n)
    print(f"\\nN={test_n}:")
    print(f"  Before: min_dist = {get_min_distance(trees):.2e}, side = {get_bounding_box_side(trees):.6f}")
    
    new_positions, success = force_directed_separation(trees, min_gap=0.0001, max_iterations=2000)
    new_trees = [ChristmasTree(p[0], p[1], p[2]) for p in new_positions]
    print(f"  Success: {success}")
    print(f"  After: min_dist = {get_min_distance(new_trees):.2e}, side = {get_bounding_box_side(new_trees):.6f}")

\nN=10:
  Before: min_dist = 2.21e-15, side = 1.940696


  Success: False
  After: min_dist = 0.00e+00, side = 1.940825
\nN=20:
  Before: min_dist = 6.94e-17, side = 2.742469


  Success: False
  After: min_dist = 0.00e+00, side = 2.742681
\nN=50:
  Before: min_dist = 9.81e-17, side = 4.247076


  Success: False
  After: min_dist = 0.00e+00, side = 4.247262


In [13]:
# Analyze the gap between touching and valid for each N
gaps = []
for n in range(1, 201):
    trees_touching = load_trees_for_n(df_touching, n)
    trees_valid = load_trees_for_n(df_valid, n)
    
    touching_side = get_bounding_box_side(trees_touching)
    valid_side = get_bounding_box_side(trees_valid)
    
    touching_contrib = (touching_side ** 2) / n
    valid_contrib = (valid_side ** 2) / n
    gap = valid_contrib - touching_contrib
    
    gaps.append({
        'n': n,
        'touching_side': touching_side,
        'valid_side': valid_side,
        'touching_contrib': touching_contrib,
        'valid_contrib': valid_contrib,
        'gap': gap
    })

# Sort by gap (largest first)
gaps_sorted = sorted(gaps, key=lambda x: x['gap'], reverse=True)
print("Top 20 N values with largest gap (valid - touching):")
for g in gaps_sorted[:20]:
    print(f"  N={g['n']:3d}: gap={g['gap']:.4f} (valid={g['valid_contrib']:.4f}, touching={g['touching_contrib']:.4f})")

print(f"\nTotal gap: {sum(g['gap'] for g in gaps):.4f}")

Top 20 N values with largest gap (valid - touching):
  N=181: gap=0.0395 (valid=0.3695, touching=0.3299)
  N=168: gap=0.0365 (valid=0.3690, touching=0.3325)
  N=194: gap=0.0348 (valid=0.3678, touching=0.3330)
  N=165: gap=0.0250 (valid=0.3606, touching=0.3356)
  N=166: gap=0.0238 (valid=0.3587, touching=0.3348)
  N=167: gap=0.0237 (valid=0.3565, touching=0.3328)
  N=144: gap=0.0230 (valid=0.3652, touching=0.3423)
  N=138: gap=0.0220 (valid=0.3630, touching=0.3410)
  N=164: gap=0.0212 (valid=0.3585, touching=0.3373)
  N= 96: gap=0.0206 (valid=0.3670, touching=0.3464)
  N= 91: gap=0.0201 (valid=0.3680, touching=0.3479)
  N=139: gap=0.0200 (valid=0.3610, touching=0.3410)
  N=137: gap=0.0198 (valid=0.3626, touching=0.3428)
  N=186: gap=0.0192 (valid=0.3602, touching=0.3410)
  N= 93: gap=0.0188 (valid=0.3654, touching=0.3466)
  N=189: gap=0.0180 (valid=0.3568, touching=0.3388)
  N= 92: gap=0.0171 (valid=0.3654, touching=0.3483)
  N=162: gap=0.0166 (valid=0.3536, touching=0.3370)
  N=134: ga

In [16]:
# Save the best result we have
# Build submission from results
all_rows = []
for r in results:
    n = r['n']
    for idx, (x, y, angle) in enumerate(r['positions']):
        all_rows.append({
            'id': f"{n:03d}_{idx}",
            'x': f"s{x}",
            'y': f"s{y}",
            'deg': f"s{angle}"
        })

submission_df = pd.DataFrame(all_rows)
print(f"Submission has {len(submission_df)} rows")

# Verify all configurations are valid
print("\\nVerifying validity...")
invalid_count = 0
for n in range(1, 201):
    trees = load_trees_for_n(submission_df, n)
    if not is_valid_configuration(trees, min_gap=1e-9):
        invalid_count += 1
        
if invalid_count > 0:
    print(f"WARNING: {invalid_count} invalid configurations!")
else:
    print("All configurations are valid!")

# Calculate final score
final_score = 0
for n in range(1, 201):
    trees = load_trees_for_n(submission_df, n)
    side = get_bounding_box_side(trees)
    final_score += (side ** 2) / n
    
print(f"\\nFinal score: {final_score:.6f}")

Submission has 20100 rows
\nVerifying validity...


All configurations are valid!


\nFinal score: 71.811060


In [None]:
# Save submission
submission_df.to_csv('/home/code/experiments/003_micro_separation/submission.csv', index=False)
print("Saved to experiments/003_micro_separation/submission.csv")

# Copy to /home/submission/
import shutil
shutil.copy('/home/code/experiments/003_micro_separation/submission.csv', '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

# Save metrics
metrics = {'cv_score': final_score}
with open('/home/code/experiments/003_micro_separation/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Metrics saved: {metrics}")

In [None]:
# Verify all configurations are valid
print("\nVerifying all configurations...")
invalid_count = 0
for r in results:
    trees = [ChristmasTree(p[0], p[1], p[2]) for p in r['positions']]
    if not is_valid_configuration(trees, min_gap=1e-9):
        invalid_count += 1
        print(f"N={r['n']}: INVALID (min_dist={get_min_distance(trees):.2e})")

if invalid_count == 0:
    print("All configurations are valid!")
else:
    print(f"\n{invalid_count} invalid configurations")

In [None]:
# Build submission dataframe
all_rows = []
for r in results:
    n = r['n']
    for idx, (x, y, angle) in enumerate(r['positions']):
        all_rows.append({
            'id': f"{n:03d}_{idx}",
            'x': f"s{x}",
            'y': f"s{y}",
            'deg': f"s{angle}"
        })

submission_df = pd.DataFrame(all_rows)
print(f"Submission has {len(submission_df)} rows")
print(submission_df.head(10))

In [None]:
# Save submission
submission_df.to_csv('/home/code/experiments/003_micro_separation/submission.csv', index=False)
print("Saved to experiments/003_micro_separation/submission.csv")

# Copy to /home/submission/
import shutil
shutil.copy('/home/code/experiments/003_micro_separation/submission.csv', '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

# Save metrics
metrics = {'cv_score': separated_score}
with open('/home/code/experiments/003_micro_separation/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Metrics saved: {metrics}")

In [None]:
# Score breakdown by N range
print("\nScore breakdown by N range:")
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    range_results = [r for r in results if start <= r['n'] <= end]
    range_score = sum(r['contribution'] for r in range_results)
    print(f"  N={start}-{end}: {range_score:.4f}")

print(f"\nTotal: {separated_score:.6f}")