# Scale Approach: Create Gaps by Scaling Positions

Take the touching ensemble (70.65) and scale all positions outward from centroid.
This creates small gaps between all trees while preserving relative positions.

In [1]:
import pandas as pd
import numpy as np
from shapely import affinity
from shapely.geometry import Polygon
from itertools import combinations
import json
import warnings
warnings.filterwarnings('ignore')

class ChristmasTree:
    def __init__(self, center_x, center_y, angle):
        self.center_x = float(center_x)
        self.center_y = float(center_y)
        self.angle = float(angle)
        
        initial_polygon = Polygon([
            (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5),
            (0.2, 0.25), (0.1, 0.25), (0.35, 0.0),
            (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2),
            (-0.075, 0.0), (-0.35, 0.0), (-0.1, 0.25),
            (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5),
        ])
        rotated = affinity.rotate(initial_polygon, self.angle, origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=self.center_x, yoff=self.center_y)

def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

def get_min_distance(trees):
    if len(trees) <= 1:
        return float('inf')
    min_dist = float('inf')
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        min_dist = min(min_dist, dist)
    return min_dist

def get_bounding_box_side(trees):
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    return max(max_x - min_x, max_y - min_y)

def is_valid_configuration(trees, min_gap=1e-9):
    if len(trees) <= 1:
        return True
    for i, j in combinations(range(len(trees)), 2):
        dist = trees[i].polygon.distance(trees[j].polygon)
        if dist < min_gap:
            return False
    return True

print("Functions defined")

Functions defined


In [2]:
# Load touching ensemble
df_touching = pd.read_csv('/home/code/experiments/002_valid_ensemble/submission.csv')
print(f"Loaded {len(df_touching)} rows")

# Calculate original score
original_score = 0
for n in range(1, 201):
    trees = load_trees_for_n(df_touching, n)
    side = get_bounding_box_side(trees)
    original_score += (side ** 2) / n
print(f"Original touching score: {original_score:.6f}")

Loaded 20100 rows


Original touching score: 70.646824


In [3]:
def scale_configuration(trees, scale_factor):
    """
    Scale all tree positions outward from centroid.
    This increases distances between trees proportionally.
    """
    if len(trees) <= 1:
        return [(t.center_x, t.center_y, t.angle) for t in trees]
    
    # Calculate centroid
    cx = sum(t.center_x for t in trees) / len(trees)
    cy = sum(t.center_y for t in trees) / len(trees)
    
    # Scale positions
    new_positions = []
    for t in trees:
        dx = t.center_x - cx
        dy = t.center_y - cy
        new_x = cx + dx * scale_factor
        new_y = cy + dy * scale_factor
        new_positions.append((new_x, new_y, t.angle))
    
    return new_positions

print("Scale function defined")

Scale function defined


In [4]:
# Test scaling on a few N values
test_ns = [10, 50, 100, 200]
for test_n in test_ns:
    trees = load_trees_for_n(df_touching, test_n)
    original_side = get_bounding_box_side(trees)
    original_min_dist = get_min_distance(trees)
    
    # Try different scale factors
    for scale in [1.001, 1.002, 1.005, 1.01]:
        scaled_positions = scale_configuration(trees, scale)
        scaled_trees = [ChristmasTree(p[0], p[1], p[2]) for p in scaled_positions]
        
        new_side = get_bounding_box_side(scaled_trees)
        new_min_dist = get_min_distance(scaled_trees)
        is_valid = is_valid_configuration(scaled_trees, min_gap=1e-9)
        
        print(f"N={test_n}, scale={scale}: side {original_side:.4f} -> {new_side:.4f} (+{(new_side-original_side)/original_side*100:.2f}%), min_dist={new_min_dist:.2e}, valid={is_valid}")
    print()

N=10, scale=1.001: side 1.9407 -> 1.9421 (+0.07%), min_dist=0.00e+00, valid=False
N=10, scale=1.002: side 1.9407 -> 1.9435 (+0.14%), min_dist=0.00e+00, valid=False
N=10, scale=1.005: side 1.9407 -> 1.9477 (+0.36%), min_dist=0.00e+00, valid=False
N=10, scale=1.01: side 1.9407 -> 1.9548 (+0.72%), min_dist=0.00e+00, valid=False

N=50, scale=1.001: side 4.2471 -> 4.2509 (+0.09%), min_dist=0.00e+00, valid=False
N=50, scale=1.002: side 4.2471 -> 4.2547 (+0.18%), min_dist=0.00e+00, valid=False
N=50, scale=1.005: side 4.2471 -> 4.2662 (+0.45%), min_dist=0.00e+00, valid=False
N=50, scale=1.01: side 4.2471 -> 4.2854 (+0.90%), min_dist=0.00e+00, valid=False



N=100, scale=1.001: side 5.8782 -> 5.8836 (+0.09%), min_dist=1.50e-04, valid=True
N=100, scale=1.002: side 5.8782 -> 5.8891 (+0.19%), min_dist=3.00e-04, valid=True
N=100, scale=1.005: side 5.8782 -> 5.9054 (+0.46%), min_dist=7.50e-04, valid=True


N=100, scale=1.01: side 5.8782 -> 5.9327 (+0.93%), min_dist=1.50e-03, valid=True



N=200, scale=1.001: side 8.2164 -> 8.2242 (+0.09%), min_dist=4.17e-06, valid=True


N=200, scale=1.002: side 8.2164 -> 8.2320 (+0.19%), min_dist=5.60e-06, valid=True


N=200, scale=1.005: side 8.2164 -> 8.2554 (+0.47%), min_dist=8.94e-06, valid=True


N=200, scale=1.01: side 8.2164 -> 8.2944 (+0.95%), min_dist=1.43e-05, valid=True



In [5]:
# Find the minimum scale factor needed to make each N valid
def find_min_scale(trees, target_gap=1e-9, max_scale=1.1):
    """Binary search for minimum scale factor to achieve target gap"""
    if is_valid_configuration(trees, min_gap=target_gap):
        return 1.0  # Already valid
    
    low, high = 1.0, max_scale
    for _ in range(50):  # Binary search iterations
        mid = (low + high) / 2
        scaled_positions = scale_configuration(trees, mid)
        scaled_trees = [ChristmasTree(p[0], p[1], p[2]) for p in scaled_positions]
        
        if is_valid_configuration(scaled_trees, min_gap=target_gap):
            high = mid
        else:
            low = mid
    
    return high

print("Finding minimum scale factors for all N...")
scale_factors = {}
for n in range(1, 201):
    trees = load_trees_for_n(df_touching, n)
    scale = find_min_scale(trees, target_gap=1e-9)
    scale_factors[n] = scale
    if n % 50 == 0:
        print(f"N={n}: scale={scale:.6f}")

print(f"\nMax scale factor: {max(scale_factors.values()):.6f}")
print(f"Mean scale factor: {np.mean(list(scale_factors.values())):.6f}")

Finding minimum scale factors for all N...


N=50: scale=1.100000


N=100: scale=1.000000


N=150: scale=1.000000


N=200: scale=1.000000

Max scale factor: 1.100000
Mean scale factor: 1.052798


In [6]:
# Apply scaling to all N and calculate new score
results = []
for n in range(1, 201):
    trees = load_trees_for_n(df_touching, n)
    scale = scale_factors[n]
    
    if scale > 1.0:
        scaled_positions = scale_configuration(trees, scale)
        scaled_trees = [ChristmasTree(p[0], p[1], p[2]) for p in scaled_positions]
    else:
        scaled_positions = [(t.center_x, t.center_y, t.angle) for t in trees]
        scaled_trees = trees
    
    side = get_bounding_box_side(scaled_trees)
    contribution = (side ** 2) / n
    
    results.append({
        'n': n,
        'scale': scale,
        'positions': scaled_positions,
        'side': side,
        'contribution': contribution
    })

scaled_score = sum(r['contribution'] for r in results)
print(f"\nScaled score: {scaled_score:.6f}")
print(f"Original touching score: {original_score:.6f}")
print(f"Valid ensemble score: 71.812779")
print(f"Target: 68.919154")
print(f"\nImprovement over valid ensemble: {71.812779 - scaled_score:.6f}")


Scaled score: 77.663456
Original touching score: 70.646824
Valid ensemble score: 71.812779
Target: 68.919154

Improvement over valid ensemble: -5.850677


In [7]:
# Verify all configurations are valid
print("Verifying validity...")
invalid_count = 0
for r in results:
    trees = [ChristmasTree(p[0], p[1], p[2]) for p in r['positions']]
    if not is_valid_configuration(trees, min_gap=1e-9):
        invalid_count += 1
        print(f"N={r['n']}: INVALID")

if invalid_count == 0:
    print("All configurations are valid!")
else:
    print(f"\n{invalid_count} invalid configurations")

Verifying validity...
N=10: INVALID
N=13: INVALID
N=17: INVALID
N=20: INVALID
N=21: INVALID
N=22: INVALID
N=23: INVALID
N=25: INVALID
N=26: INVALID
N=27: INVALID
N=28: INVALID
N=30: INVALID
N=32: INVALID
N=34: INVALID
N=42: INVALID
N=44: INVALID
N=45: INVALID
N=46: INVALID
N=49: INVALID


N=50: INVALID


N=53: INVALID
N=57: INVALID
N=58: INVALID
N=60: INVALID
N=65: INVALID


N=66: INVALID
N=67: INVALID


N=69: INVALID
N=70: INVALID
N=73: INVALID
N=74: INVALID
N=75: INVALID
N=76: INVALID


N=81: INVALID
N=82: INVALID
N=84: INVALID
N=86: INVALID


N=92: INVALID
N=93: INVALID
N=94: INVALID
N=95: INVALID


N=102: INVALID
N=104: INVALID
N=105: INVALID


N=111: INVALID
N=112: INVALID


N=116: INVALID


N=122: INVALID
N=123: INVALID
N=124: INVALID
N=125: INVALID
N=126: INVALID


N=133: INVALID


N=136: INVALID
N=137: INVALID
N=138: INVALID


N=145: INVALID


N=159: INVALID


N=169: INVALID
N=170: INVALID
N=171: INVALID
N=172: INVALID
N=173: INVALID


N=183: INVALID
N=184: INVALID
N=185: INVALID
N=186: INVALID


N=188: INVALID


N=192: INVALID
N=193: INVALID


N=197: INVALID
N=198: INVALID
N=199: INVALID



73 invalid configurations


In [None]:
# Build and save submission
all_rows = []
for r in results:
    n = r['n']
    for idx, (x, y, angle) in enumerate(r['positions']):
        all_rows.append({
            'id': f"{n:03d}_{idx}",
            'x': f"s{x}",
            'y': f"s{y}",
            'deg': f"s{angle}"
        })

submission_df = pd.DataFrame(all_rows)
print(f"Submission has {len(submission_df)} rows")

# Save
submission_df.to_csv('/home/code/experiments/004_scale_approach/submission.csv', index=False)
print("Saved to experiments/004_scale_approach/submission.csv")

import shutil
shutil.copy('/home/code/experiments/004_scale_approach/submission.csv', '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

metrics = {'cv_score': scaled_score}
with open('/home/code/experiments/004_scale_approach/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Metrics saved: {metrics}")