# Corner Extraction Approach

This notebook implements the chistyakov corner extraction approach:
- For each large N configuration, look at trees closest to each corner
- Check if subsets of trees from larger configs beat current smaller configs
- This leverages the optimized larger configurations to find better smaller ones

In [1]:
import pandas as pd
import numpy as np
import math
import time
import os
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree

getcontext().prec = 25
print("Starting corner extraction approach...")

Starting corner extraction approach...


In [2]:
# Tree shape constants
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

def get_tree_poly(x, y, deg):
    """Create a Shapely polygon for a tree at given position and angle."""
    coords = [
        (0.0, TIP_Y), (TOP_W / 2.0, TIER_1_Y), (TOP_W / 4.0, TIER_1_Y),
        (MID_W / 2.0, TIER_2_Y), (MID_W / 4.0, TIER_2_Y), (BASE_W / 2.0, BASE_Y),
        (TRUNK_W / 2.0, BASE_Y), (TRUNK_W / 2.0, TRUNK_BOTTOM_Y),
        (-TRUNK_W / 2.0, TRUNK_BOTTOM_Y), (-TRUNK_W / 2.0, BASE_Y),
        (-BASE_W / 2.0, BASE_Y), (-MID_W / 4.0, TIER_2_Y), (-MID_W / 2.0, TIER_2_Y),
        (-TOP_W / 4.0, TIER_1_Y), (-TOP_W / 2.0, TIER_1_Y),
    ]
    poly = Polygon(coords)
    return affinity.translate(affinity.rotate(poly, deg, origin=(0, 0)), x, y)

def calculate_side_length(trees):
    """Calculate bounding square side length."""
    if not trees:
        return 0.0
    all_polys = [t['poly'] for t in trees]
    bounds = unary_union(all_polys).bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def calculate_score(trees, n):
    """Calculate score = side^2 / n."""
    side = calculate_side_length(trees)
    return side * side / n

print("Helper functions defined.")

Helper functions defined.


In [3]:
# Load baseline
def load_baseline():
    baseline_path = '/home/code/external_data/saspav_latest/santa-2025.csv'
    df = pd.read_csv(baseline_path)
    for col in ['x', 'y', 'deg']:
        if df[col].dtype == object:
            df[col] = df[col].astype(str).str.replace('s', '').astype(float)
    
    baseline = {}
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        group = df[df["id"].str.startswith(prefix)].sort_values("id")
        if len(group) == n:
            trees = []
            for _, row in group.iterrows():
                trees.append({
                    'x': row['x'],
                    'y': row['y'],
                    'deg': row['deg'],
                    'poly': get_tree_poly(row['x'], row['y'], row['deg'])
                })
            baseline[n] = trees
    return baseline

print("Loading baseline...")
baseline = load_baseline()
print(f"Loaded baseline with {len(baseline)} configurations")

# Calculate baseline scores
baseline_scores = {}
for n in range(1, 201):
    if n in baseline:
        baseline_scores[n] = calculate_score(baseline[n], n)

baseline_total = sum(baseline_scores.values())
print(f"Baseline total score: {baseline_total:.6f}")

Loading baseline...


Loaded baseline with 200 configurations


Baseline total score: 70.659958


In [4]:
# Corner extraction function
def extract_corner_subsets(layout, n_large):
    """Extract subsets from a large layout by looking at corners.
    
    Returns a dict mapping small_n -> best_trees for that n.
    """
    if not layout:
        return {}
    
    # Get bounds of the layout
    all_polys = [t['poly'] for t in layout]
    bounds = unary_union(all_polys).bounds
    
    results = {}
    
    # Check all 4 corners
    corners = [
        (bounds[0], bounds[1]),  # bottom-left
        (bounds[0], bounds[3]),  # top-left
        (bounds[2], bounds[1]),  # bottom-right
        (bounds[2], bounds[3]),  # top-right
    ]
    
    for corner_x, corner_y in corners:
        # Calculate max distance to corner for each tree
        tree_distances = []
        for tree in layout:
            poly_bounds = tree['poly'].bounds
            max_dist = max(
                abs(poly_bounds[0] - corner_x),
                abs(poly_bounds[2] - corner_x),
                abs(poly_bounds[1] - corner_y),
                abs(poly_bounds[3] - corner_y),
            )
            tree_distances.append((max_dist, tree))
        
        # Sort by distance (closest first)
        tree_distances.sort(key=lambda x: x[0])
        
        # Build subsets incrementally
        subset = []
        for dist, tree in tree_distances:
            subset.append(tree)
            small_n = len(subset)
            
            if small_n >= n_large:
                break
            
            # Calculate score for this subset
            subset_score = calculate_score(subset, small_n)
            
            # Keep best score for each small_n
            if small_n not in results or subset_score < results[small_n][0]:
                results[small_n] = (subset_score, [t.copy() for t in subset])
    
    return results

print("Corner extraction function defined.")

Corner extraction function defined.


In [5]:
# Run corner extraction for all large N
print("\nRunning corner extraction...")
t0 = time.time()

improvements = []
best_configs = {n: baseline[n] for n in range(1, 201)}
best_scores = {n: baseline_scores[n] for n in range(1, 201)}

# For each large N, extract subsets and check if they beat current smaller configs
for n_large in range(10, 201):
    if n_large not in baseline:
        continue
    
    layout = baseline[n_large]
    subsets = extract_corner_subsets(layout, n_large)
    
    for small_n, (subset_score, subset_trees) in subsets.items():
        if small_n in best_scores and subset_score < best_scores[small_n]:
            improvement = best_scores[small_n] - subset_score
            improvements.append((small_n, n_large, best_scores[small_n], subset_score, improvement))
            best_scores[small_n] = subset_score
            best_configs[small_n] = subset_trees
    
    if n_large % 50 == 0:
        print(f"  Processed N={n_large}, found {len(improvements)} improvements so far")

print(f"\nCorner extraction completed in {time.time() - t0:.1f}s")
print(f"Total improvements found: {len(improvements)}")


Running corner extraction...


  Processed N=50, found 0 improvements so far


  Processed N=100, found 0 improvements so far


  Processed N=150, found 1 improvements so far


  Processed N=200, found 1 improvements so far

Corner extraction completed in 470.6s
Total improvements found: 1


In [6]:
# Show improvements
if improvements:
    print("\nImprovements found:")
    for small_n, n_large, old_score, new_score, imp in sorted(improvements, key=lambda x: -x[4])[:20]:
        print(f"  N={small_n} from N={n_large}: {old_score:.6f} -> {new_score:.6f} (improvement: {imp:.6f})")

# Calculate new total score
new_total = sum(best_scores.values())
print(f"\nBaseline total score: {baseline_total:.6f}")
print(f"New total score: {new_total:.6f}")
print(f"Total improvement: {baseline_total - new_total:.6f}")


Improvements found:
  N=65 from N=101: 0.363793 -> 0.363328 (improvement: 0.000466)

Baseline total score: 70.659958
New total score: 70.659493
Total improvement: 0.000466


In [None]:
# Save submission if improved
if new_total < baseline_total:
    print("\nSaving improved submission...")
    os.makedirs('/home/submission', exist_ok=True)
    
    rows = []
    for n in range(1, 201):
        config = best_configs[n]
        for i, t in enumerate(config):
            rows.append({
                'id': f'{n:03d}_{i}',
                'x': f"s{t['x']}",
                'y': f"s{t['y']}",
                'deg': f"s{t['deg']}"
            })
    
    df = pd.DataFrame(rows)
    df.to_csv('/home/submission/submission.csv', index=False)
    print(f"Saved to /home/submission/submission.csv")
    print(f"Total rows: {len(df)}")
else:
    print("\nNo improvement found. Keeping baseline.")

print(f"\nFinal score: {new_total:.9f}")