# Small N Optimization

Focus on optimizing N=2-20 which have the highest per-tree score impact.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import json
from scipy.optimize import minimize, differential_evolution
import warnings
warnings.filterwarnings('ignore')

# Tree shape definition
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125, 0]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5, 0.8]

def get_tree_polygon(x, y, angle_deg):
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle_deg, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x, yoff=y)
    return poly

def has_overlap(poly1, poly2, tolerance=1e-9):
    if not poly1.intersects(poly2):
        return False
    intersection = poly1.intersection(poly2)
    return intersection.area > tolerance

def get_side_length(polys):
    union = unary_union(polys)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def check_overlaps(polys):
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if has_overlap(polys[i], polys[j]):
                return True
    return False

print("Functions defined")

Functions defined


In [2]:
# Load current best submission
df = pd.read_csv('/home/submission/submission.csv')
df['x_val'] = df['x'].astype(str).str.replace('s', '').astype(float)
df['y_val'] = df['y'].astype(str).str.replace('s', '').astype(float)
df['deg_val'] = df['deg'].astype(str).str.replace('s', '').astype(float)
df['n'] = df['id'].apply(lambda x: int(str(x).split('_')[0]))

# Calculate current scores per N
current_scores = {}
for n in range(1, 201):
    group = df[df['n'] == n]
    polys = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]
    side = get_side_length(polys)
    current_scores[n] = side**2 / n

total_score = sum(current_scores.values())
print(f"Current total score: {total_score:.6f}")
print(f"\nTop 10 worst N values:")
for n, score in sorted(current_scores.items(), key=lambda x: -x[1])[:10]:
    print(f"  N={n}: {score:.6f}")

Current total score: 70.627569

Top 10 worst N values:
  N=1: 0.661250
  N=2: 0.450779
  N=3: 0.434745
  N=5: 0.416850
  N=4: 0.416545
  N=7: 0.399897
  N=6: 0.399610
  N=9: 0.387415
  N=8: 0.385407
  N=15: 0.376978


In [3]:
# Optimize N=2 specifically - try different rotation combinations
def optimize_n2():
    """Try to find a better configuration for N=2."""
    best_side = float('inf')
    best_config = None
    
    # Get current N=2 configuration
    n2_group = df[df['n'] == 2]
    current_trees = [(row['x_val'], row['y_val'], row['deg_val']) for _, row in n2_group.iterrows()]
    current_polys = [get_tree_polygon(x, y, deg) for x, y, deg in current_trees]
    current_side = get_side_length(current_polys)
    print(f"Current N=2 side: {current_side:.6f}")
    
    # Try many random configurations
    np.random.seed(42)
    for trial in range(10000):
        # Random positions and angles
        x1, y1 = np.random.uniform(-2, 2, 2)
        x2, y2 = np.random.uniform(-2, 2, 2)
        a1 = np.random.uniform(0, 360)
        a2 = np.random.uniform(0, 360)
        
        poly1 = get_tree_polygon(x1, y1, a1)
        poly2 = get_tree_polygon(x2, y2, a2)
        
        if not has_overlap(poly1, poly2):
            side = get_side_length([poly1, poly2])
            if side < best_side:
                best_side = side
                best_config = [(x1, y1, a1), (x2, y2, a2)]
    
    print(f"Best N=2 side found: {best_side:.6f}")
    if best_side < current_side:
        print(f"Improvement: {current_side - best_side:.6f}")
    else:
        print("No improvement found")
    return best_config, best_side

best_n2_config, best_n2_side = optimize_n2()

Current N=2 side: 0.949504


Best N=2 side found: 1.089993
No improvement found


In [4]:
# Use differential evolution for N=2 optimization
def objective_n2(params):
    """Objective function for N=2 optimization."""
    x1, y1, a1, x2, y2, a2 = params
    poly1 = get_tree_polygon(x1, y1, a1)
    poly2 = get_tree_polygon(x2, y2, a2)
    
    if has_overlap(poly1, poly2):
        return 100  # Penalty for overlap
    
    return get_side_length([poly1, poly2])

# Bounds for optimization
bounds = [(-2, 2), (-2, 2), (0, 360), (-2, 2), (-2, 2), (0, 360)]

print("Running differential evolution for N=2...")
result = differential_evolution(objective_n2, bounds, maxiter=500, seed=42, workers=-1, polish=True)
print(f"Best N=2 side from DE: {result.fun:.6f}")
print(f"Best params: {result.x}")

Running differential evolution for N=2...


Best N=2 side from DE: 0.990959
Best params: [  0.78518611   0.71041753 159.36246      1.23286483   0.25595605
 112.69604783]


In [5]:
# Check if we found an improvement
n2_group = df[df['n'] == 2]
current_trees = [(row['x_val'], row['y_val'], row['deg_val']) for _, row in n2_group.iterrows()]
current_polys = [get_tree_polygon(x, y, deg) for x, y, deg in current_trees]
current_n2_side = get_side_length(current_polys)

print(f"Current N=2 side: {current_n2_side:.6f}")
print(f"Best found N=2 side: {result.fun:.6f}")
print(f"Improvement: {current_n2_side - result.fun:.6f}")

# Verify no overlap
x1, y1, a1, x2, y2, a2 = result.x
poly1 = get_tree_polygon(x1, y1, a1)
poly2 = get_tree_polygon(x2, y2, a2)
print(f"Has overlap: {has_overlap(poly1, poly2)}")

Current N=2 side: 0.949504
Best found N=2 side: 0.990959
Improvement: -0.041455
Has overlap: False


In [6]:
# Calculate potential score improvement
if result.fun < current_n2_side:
    old_score_n2 = current_n2_side**2 / 2
    new_score_n2 = result.fun**2 / 2
    improvement = old_score_n2 - new_score_n2
    print(f"Score improvement for N=2: {improvement:.6f}")
    print(f"New total score would be: {total_score - improvement:.6f}")
else:
    print("No improvement found for N=2")

No improvement found for N=2


In [7]:
# Save metrics
metrics = {
    'cv_score': total_score,
    'n2_current_side': current_n2_side,
    'n2_best_side': result.fun,
    'n2_improvement': current_n2_side - result.fun if result.fun < current_n2_side else 0,
    'notes': 'Attempted small N optimization using differential evolution. Current solution is at local optimum.'
}

with open('/home/code/experiments/004_long_sa/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("Metrics saved")

Metrics saved


In [None]:
# Verify the current submission is valid (no overlaps)\nprint(\"Validating current submission...\")\noverlap_count = 0\nfor n in range(1, 201):\n    group = df[df['n'] == n]\n    polys = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]\n    if check_overlaps(polys):\n        overlap_count += 1\n        if overlap_count <= 5:\n            print(f\"  OVERLAP at N={n}\")\n\nprint(f\"\\nTotal N values with overlaps: {overlap_count}\")\nprint(f\"Final CV Score: {total_score:.6f}\")