# Experiment 032: Corner Rebuild for Large N (51-200)

Based on Chistyakov's kernel, this approach:
1. Takes a large N layout (e.g., N=200, N=150, N=111)
2. Selects a corner of the bounding box
3. Sorts trees by distance from corner
4. Rebuilds smaller N layouts by taking closest trees first
5. This can propagate good large-N solutions to smaller N values

**KEY INSIGHT**: The gap is entirely in N=51-200 (1.705 points). We're already better than Paul Jurczak for N=1-50.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import copy
import time

getcontext().prec = 25

# Tree shape constants
TREE_VERTICES = np.array([
    [0.0, 0.8], [0.125, 0.5], [0.0625, 0.5], [0.2, 0.25], [0.1, 0.25],
    [0.35, 0.0], [0.075, 0.0], [0.075, -0.2], [-0.075, -0.2], [-0.075, 0.0],
    [-0.35, 0.0], [-0.1, 0.25], [-0.2, 0.25], [-0.0625, 0.5], [-0.125, 0.5],
], dtype=np.float64)

def create_tree_polygon(x, y, deg):
    """Create a Shapely polygon for a tree."""
    tree = Polygon(TREE_VERTICES)
    tree = affinity.rotate(tree, deg, origin=(0, 0))
    tree = affinity.translate(tree, x, y)
    return tree

def check_overlap(trees):
    """Check if any trees overlap."""
    n = len(trees)
    for i in range(n):
        for j in range(i + 1, n):
            if trees[i].overlaps(trees[j]) or trees[i].contains(trees[j]) or trees[j].contains(trees[i]):
                return True
    return False

def get_side_length(trees):
    """Get the side length of the bounding square."""
    all_bounds = [t.bounds for t in trees]
    min_x = min(b[0] for b in all_bounds)
    min_y = min(b[1] for b in all_bounds)
    max_x = max(b[2] for b in all_bounds)
    max_y = max(b[3] for b in all_bounds)
    return max(max_x - min_x, max_y - min_y)

def calculate_score(trees):
    """Calculate score = SÂ² / N."""
    side = get_side_length(trees)
    return side * side / len(trees)

print("Functions defined")

Functions defined


In [2]:
# Load baseline submission
df = pd.read_csv('/home/submission/submission.csv')

def parse_value(v):
    if isinstance(v, str) and v.startswith('s'):
        return float(v[1:])
    return float(v)

def get_config(n):
    """Get configuration for N trees."""
    prefix = f"{n:03d}_"
    group = df[df["id"].str.startswith(prefix)].sort_values("id")
    configs = []
    for _, row in group.iterrows():
        x = parse_value(row["x"])
        y = parse_value(row["y"])
        deg = parse_value(row["deg"])
        configs.append((x, y, deg))
    return configs

# Load all configurations
all_configs = {}
all_trees = {}
all_scores = {}

for n in range(1, 201):
    configs = get_config(n)
    all_configs[n] = configs
    trees = [create_tree_polygon(x, y, deg) for x, y, deg in configs]
    all_trees[n] = trees
    all_scores[n] = calculate_score(trees)

print(f"Loaded {len(all_configs)} configurations")
print(f"Total baseline score: {sum(all_scores.values()):.6f}")
print(f"N=51-200 score: {sum(all_scores[n] for n in range(51, 201)):.6f}")

Loaded 200 configurations
Total baseline score: 70.624381
N=51-200 score: 51.591065


In [3]:
def corner_rebuild(source_n, target_range, all_configs, all_trees, all_scores):
    """
    Rebuild smaller N layouts from a larger N layout using corner selection.
    
    Args:
        source_n: The large N to use as source
        target_range: Range of N values to try rebuilding
        all_configs: Dict of all configurations
        all_trees: Dict of all tree polygons
        all_scores: Dict of all scores
    
    Returns:
        Dict of improvements {n: (new_configs, new_score, improvement)}
    """
    improvements = {}
    
    # Get source layout
    source_configs = all_configs[source_n]
    source_trees = all_trees[source_n]
    
    # Get bounding box corners
    all_bounds = [t.bounds for t in source_trees]
    min_x = min(b[0] for b in all_bounds)
    min_y = min(b[1] for b in all_bounds)
    max_x = max(b[2] for b in all_bounds)
    max_y = max(b[3] for b in all_bounds)
    
    corners = [
        (min_x, min_y),  # bottom-left
        (min_x, max_y),  # top-left
        (max_x, min_y),  # bottom-right
        (max_x, max_y),  # top-right
    ]
    
    for corner_x, corner_y in corners:
        # Calculate distance from corner for each tree
        distances = []
        for i, tree in enumerate(source_trees):
            bounds = tree.bounds
            dist = max(
                abs(bounds[0] - corner_x),
                abs(bounds[2] - corner_x),
                abs(bounds[1] - corner_y),
                abs(bounds[3] - corner_y),
            )
            distances.append((dist, i))
        
        # Sort by distance (closest first)
        distances.sort()
        
        # Rebuild layouts for target range
        for target_n in target_range:
            if target_n >= source_n:
                continue
            
            # Take the closest target_n trees
            selected_indices = [idx for _, idx in distances[:target_n]]
            new_configs = [source_configs[i] for i in selected_indices]
            new_trees = [source_trees[i] for i in selected_indices]
            
            # Check for overlaps
            if check_overlap(new_trees):
                continue
            
            # Calculate new score
            new_score = calculate_score(new_trees)
            current_score = all_scores[target_n]
            
            if new_score < current_score:
                improvement = current_score - new_score
                if target_n not in improvements or improvement > improvements[target_n][2]:
                    improvements[target_n] = (new_configs, new_score, improvement)
    
    return improvements

print("Corner rebuild function defined")

Corner rebuild function defined


In [4]:
# Try corner rebuild from multiple large N sources
print("Running corner rebuild from large N sources...")
print("Focus on N=51-200 where the gap is")
print()

all_improvements = {}

# Try from N=200 down to N=60
for source_n in [200, 180, 160, 140, 120, 100, 80, 60]:
    print(f"Source N={source_n}...")
    t0 = time.time()
    
    # Target range: all N values smaller than source_n, but focus on 51-200
    target_range = range(max(51, source_n - 50), source_n)
    
    improvements = corner_rebuild(source_n, target_range, all_configs, all_trees, all_scores)
    
    for n, (configs, score, improvement) in improvements.items():
        if n not in all_improvements or improvement > all_improvements[n][2]:
            all_improvements[n] = (configs, score, improvement)
            print(f"  N={n}: {all_scores[n]:.6f} -> {score:.6f} (improvement: {improvement:.6f})")
    
    print(f"  Time: {time.time() - t0:.1f}s")

print(f"\nTotal improvements found: {len(all_improvements)}")

Running corner rebuild from large N sources...
Focus on N=51-200 where the gap is

Source N=200...


  Time: 23.0s
Source N=180...


  Time: 18.3s
Source N=160...


  Time: 13.7s
Source N=140...


  Time: 10.1s
Source N=120...


  Time: 7.3s
Source N=100...


  Time: 4.4s
Source N=80...


  Time: 1.9s
Source N=60...


  Time: 0.4s

Total improvements found: 0


In [5]:
# Apply improvements
if all_improvements:
    print("\nApplying improvements...")
    
    new_configs = dict(all_configs)
    new_trees = dict(all_trees)
    new_scores = dict(all_scores)
    
    for n, (configs, score, improvement) in all_improvements.items():
        new_configs[n] = configs
        new_trees[n] = [create_tree_polygon(x, y, deg) for x, y, deg in configs]
        new_scores[n] = score
    
    new_total = sum(new_scores.values())
    old_total = sum(all_scores.values())
    
    print(f"Old total score: {old_total:.6f}")
    print(f"New total score: {new_total:.6f}")
    print(f"Total improvement: {old_total - new_total:.6f}")
else:
    print("\nNo improvements found from corner rebuild")
    new_configs = all_configs
    new_scores = all_scores


No improvements found from corner rebuild


In [6]:
# Validate all groups
print("\nValidating all groups...")
overlap_count = 0

for n in range(1, 201):
    configs = new_configs[n]
    trees = [create_tree_polygon(x, y, deg) for x, y, deg in configs]
    if check_overlap(trees):
        overlap_count += 1
        print(f"  Group {n:03d} has overlaps!")

if overlap_count == 0:
    print("All groups valid - no overlaps!")
else:
    print(f"\nWARNING: {overlap_count} groups have overlaps!")


Validating all groups...


All groups valid - no overlaps!


In [7]:
# Save submission if improved and valid
final_score = sum(new_scores.values())
baseline_score = sum(all_scores.values())

if final_score < baseline_score and overlap_count == 0:
    print(f"\nSaving improved submission...")
    
    rows = []
    for n in range(1, 201):
        for i, (x, y, deg) in enumerate(new_configs[n]):
            rows.append({
                "id": f"{n:03d}_{i}",
                "x": f"s{x}",
                "y": f"s{y}",
                "deg": f"s{deg}",
            })
    
    new_df = pd.DataFrame(rows)
    new_df.to_csv("/home/submission/submission.csv", index=False)
    print(f"Saved to /home/submission/submission.csv")
    print(f"New total score: {final_score:.9f}")
else:
    print(f"\nNo improvement or invalid - keeping baseline")
    print(f"Baseline score: {baseline_score:.9f}")
    final_score = baseline_score


No improvement or invalid - keeping baseline
Baseline score: 70.624380724


In [8]:
# Save metrics
import json

metrics = {
    'cv_score': final_score,
    'baseline_score': baseline_score,
    'improvement': baseline_score - final_score,
    'improvements_found': len(all_improvements),
    'overlap_count': overlap_count,
    'approach': 'Corner rebuild from large N (Chistyakov method)'
}

with open('/home/code/experiments/032_corner_rebuild_large_n/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("\nMetrics saved:")
for k, v in metrics.items():
    print(f"  {k}: {v}")


Metrics saved:
  cv_score: 70.6243807244223
  baseline_score: 70.6243807244223
  improvement: 0.0
  improvements_found: 0
  overlap_count: 0
  approach: Corner rebuild from large N (Chistyakov method)


In [9]:
# Search all snapshots for better N=51-200 solutions
print("Searching snapshots for better N=51-200 solutions...")

import os
import glob

# Find all submission files in snapshots
snapshot_submissions = glob.glob('/home/nonroot/snapshots/santa-2025/*/submission/submission.csv')
print(f"Found {len(snapshot_submissions)} snapshot submissions")

# Also check for other submission files
other_submissions = glob.glob('/home/nonroot/snapshots/santa-2025/*/code/**/*.csv', recursive=True)
print(f"Found {len(other_submissions)} other CSV files")

# Filter to likely submission files
submission_files = []
for f in snapshot_submissions + other_submissions:
    if 'submission' in f.lower() or 'santa' in f.lower():
        submission_files.append(f)

print(f"Total submission files to check: {len(submission_files)}")

Searching snapshots for better N=51-200 solutions...
Found 68 snapshot submissions
Found 2939 other CSV files
Total submission files to check: 3007


In [11]:
# Load and compare N=51-200 scores from all snapshots
print("Loading and comparing N=51-200 scores from snapshots...")

def load_submission_scores(filepath, n_range):
    """Load submission and return scores for specified N range."""
    try:
        df = pd.read_csv(filepath)
        if 'id' not in df.columns or 'x' not in df.columns:
            return None
        
        scores = {}
        for n in n_range:
            prefix = f"{n:03d}_"
            group = df[df["id"].str.startswith(prefix)].sort_values("id")
            if len(group) != n:
                return None  # Invalid submission
            
            configs = []
            for _, row in group.iterrows():
                x = parse_value(row["x"])
                y = parse_value(row["y"])
                deg = parse_value(row["deg"])
                configs.append((x, y, deg))
            
            trees = [create_tree_polygon(x, y, deg) for x, y, deg in configs]
            
            # Check for overlaps
            if check_overlap(trees):
                return None  # Invalid - has overlaps
            
            scores[n] = calculate_score(trees)
        
        return scores
    except Exception as e:
        return None

# Check a sample of submissions first
sample_files = submission_files[:100]
print(f"Checking {len(sample_files)} files...")

better_sources = []
for i, filepath in enumerate(sample_files):
    if i % 20 == 0:
        print(f"  Checked {i}/{len(sample_files)}...")
    
    scores = load_submission_scores(filepath, range(51, 201))
    if scores is None:
        continue
    
    # Compare with baseline
    improvements = 0
    total_improvement = 0
    for n in range(51, 201):
        if n in scores and scores[n] < all_scores[n]:
            improvements += 1
            total_improvement += all_scores[n] - scores[n]
    
    if improvements > 0:
        better_sources.append((filepath, improvements, total_improvement))
        print(f"  Found better source: {filepath}")
        print(f"    Improvements: {improvements} N values, total: {total_improvement:.6f}")

print(f"\nFound {len(better_sources)} sources with improvements for N=51-200")

Loading and comparing N=51-200 scores from snapshots...
Checking 100 files...
  Checked 0/100...


  Checked 20/100...


  Checked 40/100...


  Checked 60/100...


  Checked 80/100...



Found 0 sources with improvements for N=51-200


In [12]:
# Check the main snapshot submissions (68 files) more thoroughly
print("Checking main snapshot submissions...")

main_submissions = glob.glob('/home/nonroot/snapshots/santa-2025/*/submission/submission.csv')
print(f"Found {len(main_submissions)} main submissions")

# Load all and compare
best_per_n = dict(all_scores)  # Start with baseline
best_configs_per_n = dict(all_configs)
best_source_per_n = {n: 'baseline' for n in range(1, 201)}

for filepath in main_submissions:
    try:
        df = pd.read_csv(filepath)
        if 'id' not in df.columns or 'x' not in df.columns:
            continue
        
        for n in range(51, 201):  # Focus on N=51-200
            prefix = f"{n:03d}_"
            group = df[df["id"].str.startswith(prefix)].sort_values("id")
            if len(group) != n:
                continue
            
            configs = []
            for _, row in group.iterrows():
                x = parse_value(row["x"])
                y = parse_value(row["y"])
                deg = parse_value(row["deg"])
                configs.append((x, y, deg))
            
            trees = [create_tree_polygon(x, y, deg) for x, y, deg in configs]
            
            # Check for overlaps
            if check_overlap(trees):
                continue
            
            score = calculate_score(trees)
            if score < best_per_n[n]:
                improvement = best_per_n[n] - score
                print(f"  N={n}: {best_per_n[n]:.6f} -> {score:.6f} (improvement: {improvement:.6f}) from {filepath}")
                best_per_n[n] = score
                best_configs_per_n[n] = configs
                best_source_per_n[n] = filepath
    except Exception as e:
        continue

# Calculate total improvement
new_total = sum(best_per_n.values())
old_total = sum(all_scores.values())
print(f"\nOld total: {old_total:.6f}")
print(f"New total: {new_total:.6f}")
print(f"Improvement: {old_total - new_total:.6f}")

Checking main snapshot submissions...
Found 68 main submissions



Old total: 70.624381
New total: 70.624381
Improvement: 0.000000


In [13]:
# Try hexagonal packing for large N
print("Trying hexagonal packing for N=51-200...")

def create_hexagonal_packing(n, spacing=0.9):
    """Create a hexagonal packing of n trees."""
    configs = []
    
    # Calculate grid dimensions
    rows = int(np.ceil(np.sqrt(n * 2 / np.sqrt(3))))
    cols = int(np.ceil(n / rows))
    
    # Hexagonal spacing
    dx = spacing
    dy = spacing * np.sqrt(3) / 2
    
    count = 0
    for row in range(rows):
        for col in range(cols):
            if count >= n:
                break
            
            x = col * dx
            if row % 2 == 1:
                x += dx / 2  # Offset odd rows
            y = row * dy
            
            # Alternate angles
            angle = 0 if (row + col) % 2 == 0 else 180
            
            configs.append((x, y, angle))
            count += 1
        if count >= n:
            break
    
    return configs

# Test hexagonal packing for a few N values
test_n_values = [60, 80, 100, 120, 150, 180, 200]
print("Testing hexagonal packing...")

for n in test_n_values:
    configs = create_hexagonal_packing(n)
    trees = [create_tree_polygon(x, y, deg) for x, y, deg in configs]
    
    if check_overlap(trees):
        print(f"  N={n}: INVALID (overlaps)")
        continue
    
    score = calculate_score(trees)
    baseline = all_scores[n]
    improvement = baseline - score
    print(f"  N={n}: hex={score:.6f}, baseline={baseline:.6f}, improvement={improvement:+.6f}")

Trying hexagonal packing for N=51-200...
Testing hexagonal packing...
  N=60: hex=1.023220, baseline=0.357258, improvement=-0.665962
  N=80: hex=0.927686, baseline=0.344880, improvement=-0.582806
  N=100: hex=0.855625, baseline=0.343397, improvement=-0.512228
  N=120: hex=0.862527, baseline=0.337683, improvement=-0.524843


  N=150: hex=0.917677, baseline=0.337064, improvement=-0.580613
  N=180: hex=0.869712, baseline=0.331000, improvement=-0.538712


  N=200: hex=0.883299, baseline=0.337549, improvement=-0.545750


In [None]:
# Final summary
print("=" * 60)
print("EXPERIMENT 032 SUMMARY: Corner Rebuild for Large N")
print("=" * 60)

print(f"\nBaseline score: {sum(all_scores.values()):.6f}")
print(f"N=51-200 baseline: {sum(all_scores[n] for n in range(51, 201)):.6f}")

print("\nApproaches tried:")
print("1. Corner rebuild from large N (Chistyakov method): 0 improvements")
print("2. Search 68 snapshot submissions for N=51-200: 0 improvements")
print("3. Hexagonal packing for large N: ALL WORSE than baseline")

print("\nCONCLUSION:")
print("The baseline is at an EXTREMELY STRONG LOCAL OPTIMUM for N=51-200.")
print("All approaches (corner rebuild, snapshot search, hexagonal packing)")
print("failed to find any improvements.")

# Update metrics
metrics = {
    'cv_score': 70.624381,
    'baseline_score': 70.624381,
    'improvement': 0.0,
    'corner_rebuild_improvements': 0,
    'snapshot_search_improvements': 0,
    'hexagonal_packing': 'ALL WORSE',
    'approach': 'Corner rebuild + snapshot search + hexagonal packing for N=51-200',
    'conclusion': 'Baseline is at EXTREMELY STRONG LOCAL OPTIMUM for N=51-200. No improvements found.'
}

with open('/home/code/experiments/032_corner_rebuild_large_n/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("\nMetrics saved.")