# Experiment 002: Small N Exhaustive Optimization

Strategy: Optimize N=1 through exhaustive angle search, then N=2-10 with optimization.
Goal: Reduce the 63% waste in N=1 and 35-55% waste in N=2-10.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from scipy.optimize import minimize, differential_evolution
import math
import os
import json
from tqdm import tqdm

# Tree geometry (15-vertex polygon)
TREE_VERTICES = [
    (0, 0.8),       # tip
    (-0.125, 0.5),  # tier 1 left
    (-0.05, 0.5),   # tier 1 inner left
    (-0.2, 0.25),   # tier 2 left
    (-0.1, 0.25),   # tier 2 inner left
    (-0.35, 0),     # tier 3 left
    (-0.075, 0),    # trunk top left
    (-0.075, -0.2), # trunk bottom left
    (0.075, -0.2),  # trunk bottom right
    (0.075, 0),     # trunk top right
    (0.35, 0),      # tier 3 right
    (0.1, 0.25),    # tier 2 inner right
    (0.2, 0.25),    # tier 2 right
    (0.05, 0.5),    # tier 1 inner right
    (0.125, 0.5),   # tier 1 right
]

def create_tree_polygon(x, y, angle_deg):
    """Create a tree polygon at position (x, y) with rotation angle_deg."""
    poly = Polygon(TREE_VERTICES)
    poly = rotate(poly, angle_deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    """Get the side length of the square bounding box containing all polygons."""
    if not polygons:
        return 0
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    width = max(xs) - min(xs)
    height = max(ys) - min(ys)
    return max(width, height)

def has_overlap(polygons):
    """Check if any polygons overlap (excluding touching)."""
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                # Check if intersection area is significant
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-10:
                    return True
    return False

print("Functions loaded successfully")

In [None]:
# N=1 Exhaustive Search
# For a single tree, we just need to find the angle that minimizes the bounding box

def optimize_n1():
    """Find optimal angle for N=1 by exhaustive search."""
    best_angle = 0
    best_side = float('inf')
    
    # Test all angles from 0 to 360 in 0.01 degree increments
    angles = np.arange(0, 360, 0.01)
    
    for angle in tqdm(angles, desc="N=1 exhaustive search"):
        poly = create_tree_polygon(0, 0, angle)
        bounds = poly.bounds  # (minx, miny, maxx, maxy)
        width = bounds[2] - bounds[0]
        height = bounds[3] - bounds[1]
        side = max(width, height)
        
        if side < best_side:
            best_side = side
            best_angle = angle
    
    return best_angle, best_side

best_angle_n1, best_side_n1 = optimize_n1()
print(f"\nN=1 Optimal angle: {best_angle_n1:.4f}°")
print(f"N=1 Optimal side: {best_side_n1:.6f}")
print(f"N=1 Baseline side: 0.8132")
print(f"N=1 Improvement: {0.8132 - best_side_n1:.6f}")

In [None]:
# Fine-tune N=1 around the best angle found
def fine_tune_n1(initial_angle, search_range=0.1):
    """Fine-tune N=1 angle with finer resolution."""
    best_angle = initial_angle
    best_side = float('inf')
    
    angles = np.arange(initial_angle - search_range, initial_angle + search_range, 0.0001)
    
    for angle in angles:
        poly = create_tree_polygon(0, 0, angle)
        bounds = poly.bounds
        width = bounds[2] - bounds[0]
        height = bounds[3] - bounds[1]
        side = max(width, height)
        
        if side < best_side:
            best_side = side
            best_angle = angle
    
    return best_angle, best_side

best_angle_n1_fine, best_side_n1_fine = fine_tune_n1(best_angle_n1)
print(f"N=1 Fine-tuned angle: {best_angle_n1_fine:.6f}°")
print(f"N=1 Fine-tuned side: {best_side_n1_fine:.8f}")

In [None]:
# N=2 Optimization
# For 2 trees, we need to optimize positions and angles of both trees

def evaluate_n2(params):
    """Evaluate N=2 configuration. params = [x1, y1, angle1, x2, y2, angle2]"""
    x1, y1, angle1, x2, y2, angle2 = params
    
    poly1 = create_tree_polygon(x1, y1, angle1)
    poly2 = create_tree_polygon(x2, y2, angle2)
    
    # Check for overlap
    if poly1.intersects(poly2) and not poly1.touches(poly2):
        intersection = poly1.intersection(poly2)
        if intersection.area > 1e-10:
            return 1000  # Penalty for overlap
    
    # Calculate bounding box
    side = get_bounding_box_side([poly1, poly2])
    return side

def optimize_n2():
    """Optimize N=2 using differential evolution."""
    # Bounds: x, y in [-2, 2], angle in [0, 360]
    bounds = [
        (-2, 2), (-2, 2), (0, 360),  # Tree 1
        (-2, 2), (-2, 2), (0, 360),  # Tree 2
    ]
    
    # Run multiple times with different seeds
    best_result = None
    best_score = float('inf')
    
    for seed in range(5):
        result = differential_evolution(
            evaluate_n2, bounds, 
            seed=seed, maxiter=500, 
            workers=1, disp=False,
            mutation=(0.5, 1), recombination=0.7
        )
        if result.fun < best_score:
            best_score = result.fun
            best_result = result
    
    return best_result.x, best_result.fun

print("Optimizing N=2...")
params_n2, side_n2 = optimize_n2()
print(f"N=2 Optimal side: {side_n2:.6f}")
print(f"N=2 Baseline side: 0.9495")
print(f"N=2 Improvement: {0.9495 - side_n2:.6f}")
print(f"N=2 Params: {params_n2}")

In [None]:
# Generic optimizer for N=3 to N=10
def evaluate_n(params, n):
    """Evaluate N-tree configuration. params = [x1, y1, angle1, x2, y2, angle2, ...]"""
    polygons = []
    for i in range(n):
        x = params[i*3]
        y = params[i*3 + 1]
        angle = params[i*3 + 2]
        poly = create_tree_polygon(x, y, angle)
        polygons.append(poly)
    
    # Check for overlaps
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-10:
                    return 1000 + intersection.area * 100  # Penalty
    
    side = get_bounding_box_side(polygons)
    return side

def optimize_n(n, max_iter=300, n_runs=3):
    """Optimize N trees using differential evolution."""
    # Bounds scale with N
    max_coord = 0.5 + 0.3 * n
    bounds = []
    for _ in range(n):
        bounds.extend([
            (-max_coord, max_coord),  # x
            (-max_coord, max_coord),  # y
            (0, 360),                  # angle
        ])
    
    best_result = None
    best_score = float('inf')
    
    for seed in range(n_runs):
        result = differential_evolution(
            lambda p: evaluate_n(p, n), bounds,
            seed=seed, maxiter=max_iter,
            workers=1, disp=False,
            mutation=(0.5, 1), recombination=0.7,
            tol=1e-6
        )
        if result.fun < best_score:
            best_score = result.fun
            best_result = result
    
    return best_result.x, best_result.fun

print("Testing optimizer on N=3...")
params_n3, side_n3 = optimize_n(3, max_iter=200, n_runs=2)
print(f"N=3 Optimal side: {side_n3:.6f}")
print(f"N=3 Baseline side: 1.1420")

In [None]:
# Optimize N=3 to N=10
baseline_sides = {
    1: 0.8132, 2: 0.9495, 3: 1.1420, 4: 1.2908, 5: 1.4437,
    6: 1.5484, 7: 1.6731, 8: 1.7559, 9: 1.8673, 10: 1.9407
}

optimized_results = {
    1: (best_angle_n1_fine, best_side_n1_fine, [(0, 0, best_angle_n1_fine)]),
    2: (params_n2, side_n2, [(params_n2[0], params_n2[1], params_n2[2]), 
                             (params_n2[3], params_n2[4], params_n2[5])])
}

# Optimize N=3 to N=10
for n in range(3, 11):
    print(f"\nOptimizing N={n}...")
    params, side = optimize_n(n, max_iter=300, n_runs=3)
    
    # Extract tree positions
    trees = []
    for i in range(n):
        trees.append((params[i*3], params[i*3+1], params[i*3+2]))
    
    optimized_results[n] = (params, side, trees)
    
    improvement = baseline_sides[n] - side
    print(f"N={n}: side={side:.6f} (baseline={baseline_sides[n]:.4f}, improvement={improvement:.6f})")

In [None]:
# Summary of optimizations
print("\n" + "="*60)
print("OPTIMIZATION SUMMARY")
print("="*60)

total_improvement = 0
for n in range(1, 11):
    baseline = baseline_sides[n]
    optimized = optimized_results[n][1]
    improvement = baseline - optimized
    
    # Score contribution: side^2 / n
    baseline_contrib = baseline**2 / n
    optimized_contrib = optimized**2 / n
    score_improvement = baseline_contrib - optimized_contrib
    total_improvement += score_improvement
    
    print(f"N={n:2d}: baseline={baseline:.4f}, optimized={optimized:.4f}, "
          f"side_diff={improvement:+.4f}, score_diff={score_improvement:+.6f}")

print(f"\nTotal score improvement from N=1-10: {total_improvement:.6f}")
print(f"Baseline total score: 70.676102")
print(f"Expected new score: {70.676102 - total_improvement:.6f}")

In [None]:
# Load baseline submission and replace N=1-10 with optimized solutions
baseline_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
df = pd.read_csv(baseline_path)

def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

df['x_val'] = df['x'].apply(parse_value)
df['y_val'] = df['y'].apply(parse_value)
df['deg_val'] = df['deg'].apply(parse_value)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"Loaded baseline with {len(df)} rows")

In [None]:
# Create new submission with optimized N=1-10
new_rows = []

for n in range(1, 201):
    if n <= 10 and n in optimized_results:
        # Use optimized solution
        _, _, trees = optimized_results[n]
        for i, (x, y, angle) in enumerate(trees):
            new_rows.append({
                'id': f'{n:03d}_{i}',
                'x': f's{x}',
                'y': f's{y}',
                'deg': f's{angle}'
            })
    else:
        # Use baseline solution
        n_data = df[df['n'] == n]
        for _, row in n_data.iterrows():
            new_rows.append({
                'id': row['id'],
                'x': row['x'],
                'y': row['y'],
                'deg': row['deg']
            })

new_df = pd.DataFrame(new_rows)
print(f"Created new submission with {len(new_df)} rows")
print(new_df.head(20))

In [None]:
# Verify the new submission by calculating score
def calculate_full_score(df):
    """Calculate full score for a submission dataframe."""
    df = df.copy()
    df['x_val'] = df['x'].apply(parse_value)
    df['y_val'] = df['y'].apply(parse_value)
    df['deg_val'] = df['deg'].apply(parse_value)
    df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    
    total_score = 0
    for n in range(1, 201):
        n_data = df[df['n'] == n]
        if len(n_data) != n:
            print(f"Warning: N={n} has {len(n_data)} trees")
            continue
        
        polygons = []
        for _, row in n_data.iterrows():
            poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
            polygons.append(poly)
        
        side = get_bounding_box_side(polygons)
        total_score += side**2 / n
    
    return total_score

new_score = calculate_full_score(new_df)
print(f"\nNew submission score: {new_score:.6f}")
print(f"Baseline score: 70.676102")
print(f"Improvement: {70.676102 - new_score:.6f}")

In [None]:
# Check for overlaps in optimized solutions
print("\nChecking for overlaps in optimized solutions...")
has_issues = False

for n in range(1, 11):
    if n in optimized_results:
        _, _, trees = optimized_results[n]
        polygons = [create_tree_polygon(x, y, angle) for x, y, angle in trees]
        
        if has_overlap(polygons):
            print(f"WARNING: N={n} has overlapping trees!")
            has_issues = True
        else:
            print(f"N={n}: No overlaps ✓")

if not has_issues:
    print("\nAll optimized solutions are valid (no overlaps)")

In [None]:
# Save submission
os.makedirs('/home/submission', exist_ok=True)
new_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved submission to /home/submission/submission.csv")

# Save metrics
metrics = {
    'cv_score': new_score,
    'baseline_score': 70.676102,
    'improvement': 70.676102 - new_score,
    'target': 68.894234,
    'gap_to_target': new_score - 68.894234
}

with open('/home/code/experiments/002_small_n_optimization/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nFinal score: {new_score:.6f}")
print(f"Target: 68.894234")
print(f"Gap to target: {new_score - 68.894234:.6f}")